diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraphBase.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraphBase.hpp index 367f31615ad9..bc82ebe30188 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraphBase.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraphBase.hpp @@ -298,6 +298,10 @@ class LWGraphBase { return graph; } + const std::string& getObjectLabel() const { + return objectLabel_; + } + private: //! Underlying graph (with label) mutable local_graph_type graph_; diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp index 5ced774b6221..d7ced9a9e99d 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_decl.hpp @@ -15,6 +15,7 @@ #include "MueLu_LWGraphBase.hpp" #include "MueLu_LWGraph_kokkos_fwd.hpp" +#include "MueLu_LWGraph_fwd.hpp" namespace MueLu { @@ -28,6 +29,9 @@ namespace MueLu { template class LWGraph_kokkos : public MueLu::LWGraphBase { using LWGraphBase::LWGraphBase; + + public: + RCP > copyToHost(); }; } // namespace MueLu diff --git a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp index de35f71aa071..aaac670fc588 100644 --- a/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp +++ b/packages/muelu/src/Graph/Containers/MueLu_LWGraph_kokkos_def.hpp @@ -10,10 +10,35 @@ #ifndef MUELU_LWGRAPH_KOKKOS_DEF_HPP #define MUELU_LWGRAPH_KOKKOS_DEF_HPP +#include "MueLu_LWGraph.hpp" #include "MueLu_LWGraph_kokkos_decl.hpp" namespace MueLu { +template +RCP > MueLu::LWGraph_kokkos::copyToHost() { + auto graph = this->getGraph(); + + auto row_map_h = Kokkos::create_mirror_view(graph.row_map); + auto entries_h = Kokkos::create_mirror_view(graph.entries); + Kokkos::deep_copy(row_map_h, graph.row_map); + Kokkos::deep_copy(entries_h, graph.entries); + + using local_graph_type_host = typename MueLu::LWGraphBase::local_graph_type; + auto graph_h = local_graph_type_host(entries_h, row_map_h); + + auto lw_h = rcp(new MueLu::LWGraph(graph_h, this->GetDomainMap(), this->GetImportMap(), this->getObjectLabel())); + + using bndry_nodes_type = typename MueLu::LWGraphBase::boundary_nodes_type; + + auto bndry = this->GetBoundaryNodeMap(); + auto bndry_h = bndry_nodes_type("boundary_nodes", bndry.extent(0)); + Kokkos::deep_copy(bndry_h, bndry); + lw_h->SetBoundaryNodeMap(bndry_h); + + return lw_h; +} + } // namespace MueLu #endif // MUELU_LWGRAPH_KOKKOS_DEF_HPP diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index 949f8b4df50a..0b7e1d89d800 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -170,7 +170,6 @@ void CoalesceDropFactory::Build(Level typename STS::magnitudeType rowSumTol = as(pL.get("aggregation: row sum drop tol")); RCP ghostedBlockNumber; - ArrayRCP g_block_id; if (algo == "distance laplacian") { // Grab the coordinates for distance laplacian @@ -193,7 +192,6 @@ void CoalesceDropFactory::Build(Level } else { ghostedBlockNumber = BlockNumber; } - g_block_id = ghostedBlockNumber->getData(0); // } if (algo == "block diagonal colored signed classical") generateColoringGraph = true; @@ -443,11 +441,13 @@ void CoalesceDropFactory::Build(Level // RS style needs the max negative off-diagonal, SA style needs the diagonal if (useSignedClassicalRS) { if (ghostedBlockNumber.is_null()) { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A); + auto negMaxOffDiagonalVec = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A); + negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0); if (GetVerbLevel() & Statistics1) GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl; } else { - negMaxOffDiagonal = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber); + auto negMaxOffDiagonalVec = MueLu::Utilities::GetMatrixMaxMinusOffDiagonal(*A, *ghostedBlockNumber); + negMaxOffDiagonal = negMaxOffDiagonalVec->getData(0); if (GetVerbLevel() & Statistics1) GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl; } @@ -468,6 +468,10 @@ void CoalesceDropFactory::Build(Level } } + ArrayRCP g_block_id; + if (!ghostedBlockNumber.is_null()) + g_block_id = ghostedBlockNumber->getData(0); + LO realnnz = 0; rows(0) = 0; for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp index ea95497b33dd..8696993bde33 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_kokkos_def.hpp @@ -181,7 +181,7 @@ class ScalarFunctor { LO col = rowView.colidx(colID); impl_Scalar val = rowView.value(colID); - if (!dropFunctor(row, col, rowView.value(colID)) || row == col) { + if ((!bndNodes(row) && !dropFunctor(row, col, rowView.value(colID))) || row == col) { colsAux(offset + rownnz) = col; LO valID = (reuseGraph ? colID : rownnz); @@ -216,7 +216,7 @@ class ScalarFunctor { // boundaryNodes[row] = true; // We do not do it this way now because there is no framework for distinguishing isolated // and boundary nodes in the aggregation algorithms - bndNodes(row) = (rownnz == 1 && aggregationMayCreateDirichlet); + bndNodes(row) |= (rownnz == 1 && aggregationMayCreateDirichlet); nnz += rownnz; } @@ -521,6 +521,9 @@ void CoalesceDropFactory_kokkos:: } else if (blkSize == 1 && threshold != zero) { // Scalar problem with dropping + // Detect and record rows that correspond to Dirichlet boundary conditions + boundaryNodes = Utilities::DetectDirichletRows_kokkos(*A, dirichletThreshold); + typedef typename Matrix::local_matrix_type local_matrix_type; typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type; typedef typename kokkos_graph_type::row_map_type::non_const_type rows_type; @@ -566,8 +569,6 @@ void CoalesceDropFactory_kokkos:: valsAux = vals_type(Kokkos::ViewAllocateWithoutInitializing("FA_aux_vals"), nnzA); } - typename boundary_nodes_type::non_const_type bndNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows); - LO nnzFA = 0; { if (algo == "classical") { @@ -587,8 +588,8 @@ void CoalesceDropFactory_kokkos:: auto ghostedDiagView = ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadWrite); CoalesceDrop_Kokkos_Details::ClassicalDropFunctor dropFunctor(ghostedDiagView, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet); + CoalesceDrop_Kokkos_Details::ScalarFunctor + scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, aggregationMayCreateDirichlet); Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), scalarFunctor, nnzFA); @@ -659,8 +660,8 @@ void CoalesceDropFactory_kokkos:: CoalesceDrop_Kokkos_Details::DistanceLaplacianDropFunctor dropFunctor(ghostedLaplDiagView, distFunctor, threshold); - CoalesceDrop_Kokkos_Details::ScalarFunctor - scalarFunctor(kokkosMatrix, bndNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); + CoalesceDrop_Kokkos_Details::ScalarFunctor + scalarFunctor(kokkosMatrix, boundaryNodes, dropFunctor, rows, colsAux, valsAux, reuseGraph, lumping, threshold, true); Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:main_loop", range_type(0, numRows), scalarFunctor, nnzFA); @@ -669,8 +670,6 @@ void CoalesceDropFactory_kokkos:: } numDropped = nnzA - nnzFA; - boundaryNodes = bndNodes; - { SubFactoryMonitor m2(*this, "CompressRows", currentLevel); diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp index 59c5056a3be1..c695ce7fdf77 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_decl.hpp @@ -18,6 +18,7 @@ #include #include "MueLu_LWGraph_fwd.hpp" +#include "MueLu_LWGraph_kokkos_fwd.hpp" #include "MueLu_Exceptions.hpp" #include "MueLu_SingleLevelFactoryBase.hpp" diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp index 33b8118288d8..b432ffb1d868 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp @@ -24,6 +24,7 @@ #include "MueLu_Aggregates.hpp" #include "MueLu_LWGraph.hpp" +#include "MueLu_LWGraph_kokkos.hpp" #include "MueLu_Level.hpp" #include "MueLu_MasterList.hpp" #include "MueLu_Monitor.hpp" @@ -60,10 +61,10 @@ RCP NotayAggregationFactoryset >("A", null, "Generating factory of the matrix"); - validParamList->set >("Graph", null, "Generating factory of the graph"); - validParamList->set >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); + validParamList->set>("A", null, "Generating factory of the matrix"); + validParamList->set>("Graph", null, "Generating factory of the graph"); + validParamList->set>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); + validParamList->set>("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); return validParamList; } @@ -113,8 +114,14 @@ void NotayAggregationFactory::Build(L "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\"" " must be a strictly positive integer"); - RCP graph = Get >(currentLevel, "Graph"); - RCP A = Get >(currentLevel, "A"); + RCP graph; + if (IsType>(currentLevel, "Graph")) + graph = Get>(currentLevel, "Graph"); + else { + auto graph_k = Get>(currentLevel, "Graph"); + graph = graph_k->copyToHost(); + } + RCP A = Get>(currentLevel, "A"); // Setup aggregates & aggStat objects RCP aggregates = rcp(new Aggregates(*graph)); @@ -161,8 +168,8 @@ void NotayAggregationFactory::Build(L if (ordering == O_RANDOM) MueLu::NotayUtils::RandomReorder(orderingVector); else if (ordering == O_CUTHILL_MCKEE) { - RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); - auto localVector = rcmVector->getData(0); + RCP> rcmVector = MueLu::Utilities::CuthillMcKee(*A); + auto localVector = rcmVector->getData(0); for (LO i = 0; i < numRows; i++) orderingVector[i] = localVector[i]; } @@ -198,7 +205,7 @@ void NotayAggregationFactory::Build(L // Directly compute rowsum from A, rather than coarseA row_sum_type rowSum("rowSum", numLocalAggregates); { - std::vector > agg2vertex(numLocalAggregates); + std::vector> agg2vertex(numLocalAggregates); auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0); for (LO i = 0; i < (LO)numRows; i++) { if (aggStat[i] != AGGREGATED) @@ -248,8 +255,8 @@ void NotayAggregationFactory::Build(L if (ordering == O_RANDOM) MueLu::NotayUtils::RandomReorder(localOrderingVector); else if (ordering == O_CUTHILL_MCKEE) { - RCP > rcmVector = MueLu::Utilities::CuthillMcKee(*A); - auto localVector = rcmVector->getData(0); + RCP> rcmVector = MueLu::Utilities::CuthillMcKee(*A); + auto localVector = rcmVector->getData(0); for (LO i = 0; i < numRows; i++) localOrderingVector[i] = localVector[i]; } diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index 30f82f2b8b78..8aeac791865d 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -127,9 +127,9 @@ class UtilitiesBase { * @ret: vector containing max_{i\not=k}(-a_ik) */ - static Teuchos::ArrayRCP GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A); + static Teuchos::RCP> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A); - static Teuchos::ArrayRCP GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber); + static Teuchos::RCP> GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber); /*! @brief Return vector containing inverse of input vector * diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp index 690c329d1997..6e181415b09b 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp @@ -584,53 +584,84 @@ UtilitiesBase:: } template -Teuchos::ArrayRCP::magnitudeType> +Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>> UtilitiesBase:: GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A) { - size_t numRows = A.getRowMap()->getLocalNumElements(); - Magnitude ZERO = Teuchos::ScalarTraits::zero(); - Teuchos::ArrayRCP maxvec(numRows); - Teuchos::ArrayView cols; - Teuchos::ArrayView vals; - for (size_t i = 0; i < numRows; ++i) { - A.getLocalRowView(i, cols, vals); - Magnitude mymax = ZERO; - for (LocalOrdinal j = 0; j < cols.size(); ++j) { - if (Teuchos::as(cols[j]) != i) { - mymax = std::max(mymax, -Teuchos::ScalarTraits::real(vals[j])); - } - } - maxvec[i] = mymax; - } - return maxvec; + // Get/Create distributed objects + RCP rowMap = A.getRowMap(); + auto diag = Xpetra::VectorFactory::Build(rowMap, false); + + // Implement using Kokkos + using local_vector_type = typename Vector::dual_view_type::t_dev_um; + using local_matrix_type = typename Matrix::local_matrix_type; + using execution_space = typename local_vector_type::execution_space; + using values_type = typename local_matrix_type::values_type; + using scalar_type = typename values_type::non_const_value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + using KAT_S = typename Kokkos::ArithTraits; + using KAT_M = typename Kokkos::ArithTraits; + using size_type = typename local_matrix_type::non_const_size_type; + + auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); + auto local_mat_dev = A.getLocalMatrixDevice(); + Kokkos::RangePolicy my_policy(0, static_cast(diag_dev.extent(0))); + + Kokkos::parallel_for( + "GetMatrixMaxMinusOffDiagonal", my_policy, + KOKKOS_LAMBDA(const LocalOrdinal rowIdx) { + auto mymax = KAT_M::zero(); + auto row = local_mat_dev.row(rowIdx); + for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) { + if (rowIdx != row.colidx(entryIdx)) { + mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx))); + } + } + diag_dev(rowIdx, 0) = mymax; + }); + + return diag; } template -Teuchos::ArrayRCP::magnitudeType> +Teuchos::RCP::magnitudeType, LocalOrdinal, GlobalOrdinal, Node>> UtilitiesBase:: GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix& A, const Xpetra::Vector& BlockNumber) { TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map."); - Teuchos::ArrayRCP block_id = BlockNumber.getData(0); + // Get/Create distributed objects + RCP rowMap = A.getRowMap(); + auto diag = Xpetra::VectorFactory::Build(rowMap, false); - size_t numRows = A.getRowMap()->getLocalNumElements(); - Magnitude ZERO = Teuchos::ScalarTraits::zero(); - Teuchos::ArrayRCP maxvec(numRows); - Teuchos::ArrayView cols; - Teuchos::ArrayView vals; - for (size_t i = 0; i < numRows; ++i) { - A.getLocalRowView(i, cols, vals); - Magnitude mymax = ZERO; - for (LocalOrdinal j = 0; j < cols.size(); ++j) { - if (Teuchos::as(cols[j]) != i && block_id[i] == block_id[cols[j]]) { - mymax = std::max(mymax, -Teuchos::ScalarTraits::real(vals[j])); - } - } - // printf("A(%d,:) row_scale(block) = %6.4e\n",(int)i,mymax); + // Implement using Kokkos + using local_vector_type = typename Vector::dual_view_type::t_dev_um; + using local_matrix_type = typename Matrix::local_matrix_type; + using execution_space = typename local_vector_type::execution_space; + using values_type = typename local_matrix_type::values_type; + using scalar_type = typename values_type::non_const_value_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + using KAT_S = typename Kokkos::ArithTraits; + using KAT_M = typename Kokkos::ArithTraits; + using size_type = typename local_matrix_type::non_const_size_type; - maxvec[i] = mymax; - } - return maxvec; + auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll); + auto local_mat_dev = A.getLocalMatrixDevice(); + auto local_block_dev = BlockNumber.getDeviceLocalView(Xpetra::Access::ReadOnly); + Kokkos::RangePolicy my_policy(0, static_cast(diag_dev.extent(0))); + + Kokkos::parallel_for( + "GetMatrixMaxMinusOffDiagonal", my_policy, + KOKKOS_LAMBDA(const LocalOrdinal rowIdx) { + auto mymax = KAT_M::zero(); + auto row = local_mat_dev.row(rowIdx); + for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) { + if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) { + mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx))); + } + } + diag_dev(rowIdx, 0) = mymax; + }); + + return diag; } template diff --git a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp index 734b9927d92c..ad40c6f8e821 100644 --- a/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp +++ b/packages/muelu/test/unit_tests_kokkos/CoalesceDropFactory_kokkos.cpp @@ -13,6 +13,8 @@ #include "MueLu_TestHelpers_kokkos.hpp" #include "MueLu_Version.hpp" +#include "MueLu_CoalesceDropFactory.hpp" +#include "MueLu_FilteredAFactory.hpp" #include "MueLu_CoalesceDropFactory_kokkos.hpp" #include "MueLu_AmalgamationFactory.hpp" #include "MueLu_LWGraph_kokkos.hpp" @@ -37,7 +39,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWitho MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, NO); out << "version: " << MueLu::Version() << std::endl; - RCP > comm = Parameters::getDefaultComm(); + RCP> comm = Parameters::getDefaultComm(); Level fineLevel; TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); @@ -54,7 +56,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWitho dropFact.Build(fineLevel); - auto graph = fineLevel.Get >("Graph", &dropFact); + auto graph = fineLevel.Get>("Graph", &dropFact); auto myDofsPerNode = fineLevel.Get("DofsPerNode", &dropFact); TEST_EQUALITY(as(myDofsPerNode) == 1, true); @@ -108,8 +110,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWithF MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, NO); out << "version: " << MueLu::Version() << std::endl; - RCP > comm = Parameters::getDefaultComm(); - Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); Level fineLevel; TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); @@ -130,7 +132,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicScalarWithF dropFact.Build(fineLevel); - auto graph = fineLevel.Get >("Graph", &dropFact); + auto graph = fineLevel.Get>("Graph", &dropFact); auto myDofsPerNode = fineLevel.Get("DofsPerNode", &dropFact); TEST_EQUALITY(as(myDofsPerNode) == 1, true); @@ -187,8 +189,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithou MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, NO); out << "version: " << MueLu::Version() << std::endl; - RCP > comm = Parameters::getDefaultComm(); - Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); Level fineLevel; TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); @@ -209,7 +211,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithou dropFact.Build(fineLevel); - auto graph = fineLevel.Get >("Graph", &dropFact); + auto graph = fineLevel.Get>("Graph", &dropFact); auto myDofsPerNode = fineLevel.Get("DofsPerNode", &dropFact); TEST_EQUALITY(as(myDofsPerNode) == blockSize, true); @@ -280,8 +282,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithFi MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, NO); out << "version: " << MueLu::Version() << std::endl; - RCP > comm = Parameters::getDefaultComm(); - Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); Level fineLevel; TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); @@ -302,7 +304,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, ClassicBlockWithFi dropFact.Build(fineLevel); - auto graph = fineLevel.Get >("Graph", &dropFact); + auto graph = fineLevel.Get>("Graph", &dropFact); auto myDofsPerNode = fineLevel.Get("DofsPerNode", &dropFact); TEST_EQUALITY(as(myDofsPerNode) == 3, true); @@ -715,8 +717,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, AggresiveDroppingI MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); out << "version: " << MueLu::Version() << std::endl; - RCP > comm = Parameters::getDefaultComm(); - Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); RCP dofMap = Xpetra::MapFactory::Build(lib, 12 * comm->getSize(), 0, comm); Teuchos::RCP mtx = TestHelpers_kokkos::TestFactory::BuildTridiag(dofMap, 2.0, -1.0, -1.0); @@ -737,7 +739,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, AggresiveDroppingI dropFact.Build(fineLevel); - RCP graph = fineLevel.Get >("Graph", &dropFact); + RCP graph = fineLevel.Get>("Graph", &dropFact); auto boundaryNodes = graph->GetBoundaryNodeMap(); auto boundaryNodesHost = Kokkos::create_mirror_view(boundaryNodes); @@ -800,12 +802,218 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, AggresiveDroppingI } // AggresiveDroppingIsMarkedAsBoundary -#define MUELU_ETI_GROUP(SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Constructor, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithoutFiltering, SC, LO, GO, NO) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, AggresiveDroppingIsMarkedAsBoundary, SC, LO, GO, NO) +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory_kokkos, 2x2, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + out << "version: " << MueLu::Version() << std::endl; + + RCP> comm = Parameters::getDefaultComm(); + Xpetra::UnderlyingLib lib = TestHelpers_kokkos::Parameters::getLib(); + + // Set up a 2x2 matrix. + Teuchos::RCP mtx = TestHelpers_kokkos::TestFactory::build2x2(lib, + 2.0, -1.0, + -1.5, 2.0); + mtx->SetFixedBlockSize(1); + + using magnitudeType = typename Teuchos::ScalarTraits::magnitudeType; + Teuchos::RCP> coords; + coords = Xpetra::MultiVectorFactory::Build(mtx->getRowMap(), 1); + { + auto lclCoords = coords->getHostLocalView(Xpetra::Access::OverwriteAll); + auto rank = comm->getRank(); + lclCoords(0, 0) = 2 * rank; + lclCoords(1, 0) = 2 * rank + 1; + } + + using TF = TestHelpers_kokkos::TestFactory; + using local_matrix_type = typename Matrix::local_matrix_type::HostMirror; + + std::vector params; + std::vector expectedFilteredMatrices; + std::vector> expectedBoundaryNodesVector; + + for (bool reuseGraph : {false, true}) { + // test case 0 + Teuchos::ParameterList params0 = Teuchos::ParameterList(); + params0.set("aggregation: Dirichlet threshold", 0.); + // dropFact.SetParameter("aggregation: row sum drop tol", Teuchos::ParameterEntry(0.2)); + + params0.set("aggregation: drop scheme", "classical"); + params0.set("aggregation: use ml scaling of drop tol", false); + params0.set("aggregation: drop tol", 0.); + params0.set("aggregation: dropping may create Dirichlet", false); + params0.set("filtered matrix: reuse graph", reuseGraph); + params0.set("filtered matrix: use lumping", false); + params.push_back(params0); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, -1.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({false, false}); + + // test case 1 + Teuchos::ParameterList params1 = Teuchos::ParameterList(params0); + params1.set("aggregation: Dirichlet threshold", 0.2); + params.push_back(params1); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, -1.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({false, false}); + + // test case 2 + Teuchos::ParameterList params2 = Teuchos::ParameterList(params0); + params2.set("aggregation: Dirichlet threshold", 1.1); + params.push_back(params2); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, -1.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({true, false}); + + // test case 3 + Teuchos::ParameterList params3 = Teuchos::ParameterList(params0); + params3.set("aggregation: drop tol", 0.51); + params.push_back(params3); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, 0.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({false, false}); + + // test case 4 + Teuchos::ParameterList params4 = Teuchos::ParameterList(params0); + params4.set("aggregation: Dirichlet threshold", 1.1); + params4.set("aggregation: drop tol", 0.51); + params.push_back(params4); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, 0.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({true, false}); + + // test case 5 + Teuchos::ParameterList params5 = Teuchos::ParameterList(params0); + params5.set("aggregation: Dirichlet threshold", 1.1); + params5.set("aggregation: dropping may create Dirichlet", true); + params5.set("aggregation: drop tol", 0.51); + params.push_back(params5); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, 0.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({true, false}); + + // test case 6 + Teuchos::ParameterList params6 = Teuchos::ParameterList(params4); + params6.set("aggregation: Dirichlet threshold", 1.1); + params6.set("filtered matrix: use lumping", true); + params.push_back(params6); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(1.0, 0.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({true, false}); + + // test case 7 + Teuchos::ParameterList params7 = Teuchos::ParameterList(params0); + params7.set("aggregation: drop scheme", "distance laplacian"); + params.push_back(params7); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, -1.0, + -1.5, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({false, false}); + + // test case 8 + Teuchos::ParameterList params8 = Teuchos::ParameterList(params0); + params8.set("aggregation: drop scheme", "distance laplacian"); + params8.set("aggregation: drop tol", 1.01); + params.push_back(params8); + expectedFilteredMatrices.push_back(TF::buildLocal2x2Host(2.0, 0.0, + 0.0, 2.0, reuseGraph)); + expectedBoundaryNodesVector.push_back({true, true}); + } + + for (size_t testNo = 0; testNo < params.size(); ++testNo) { + out << "\n\nRunning test number " << testNo << std::endl + << std::endl; + + auto param = params[testNo]; + auto expectedFilteredMatrix = expectedFilteredMatrices[testNo]; + auto expectedBoundaryNodes = expectedBoundaryNodesVector[testNo]; + + for (bool useKokkos : {false, true}) { + RCP filteredA; + Kokkos::View boundaryNodes; + { + Level fineLevel; + TestHelpers_kokkos::TestFactory::createSingleLevelHierarchy(fineLevel); + fineLevel.Set("A", mtx); + fineLevel.Set("Coordinates", coords); + + RCP dropFact; + RCP filteredAFact; + if (useKokkos) { + out << "\nKokkos code path\nparams:\n" + << param << "\n"; + dropFact = rcp(new CoalesceDropFactory_kokkos()); + dropFact->SetParameterList(param); + filteredAFact = dropFact; + } else { + out << "\nNon-Kokkos code path\nparams:\n" + << param << "\n"; + dropFact = rcp(new CoalesceDropFactory()); + filteredAFact = rcp(new FilteredAFactory()); + auto paramCopy = Teuchos::ParameterList(param); + auto paramFilteredA = Teuchos::ParameterList(); + paramFilteredA.set("filtered matrix: use lumping", paramCopy.get("filtered matrix: use lumping")); + paramCopy.remove("filtered matrix: use lumping"); + paramFilteredA.set("filtered matrix: reuse graph", paramCopy.get("filtered matrix: reuse graph")); + paramCopy.remove("filtered matrix: reuse graph"); + paramFilteredA.set("filtered matrix: Dirichlet threshold", paramCopy.get("aggregation: Dirichlet threshold")); + dropFact->SetParameterList(paramCopy); + filteredAFact->SetParameterList(paramFilteredA); + filteredAFact->SetFactory("Graph", dropFact); + filteredAFact->SetFactory("Filtering", dropFact); + } + fineLevel.Request("A", filteredAFact.get()); + fineLevel.Request("Graph", dropFact.get()); + fineLevel.Request("DofsPerNode", dropFact.get()); + dropFact->Build(fineLevel); + + filteredA = fineLevel.Get>("A", filteredAFact.get()); + + if (useKokkos) { + auto graph = fineLevel.Get>("Graph", dropFact.get()); + auto boundaryNodes_d = graph->GetBoundaryNodeMap(); + boundaryNodes = Kokkos::create_mirror_view(boundaryNodes_d); + Kokkos::deep_copy(boundaryNodes, boundaryNodes_d); + } else { + auto graph = fineLevel.Get>("Graph", dropFact.get()); + boundaryNodes = graph->GetBoundaryNodeMap(); + } + } + + auto lclFilteredA = filteredA->getLocalMatrixHost(); + auto lclExpectedFilteredA = expectedFilteredMatrix; + + out << "Filtered A:\n" + << TF::localMatToString(lclFilteredA); + + TEST_EQUALITY(lclFilteredA.graph.row_map.extent(0), lclExpectedFilteredA.graph.row_map.extent(0)); + for (size_t row = 0; row < std::min(lclFilteredA.graph.row_map.extent(0), lclExpectedFilteredA.graph.row_map.extent(0)); ++row) + TEST_EQUALITY(lclFilteredA.graph.row_map(row), lclExpectedFilteredA.graph.row_map(row)); + + TEST_EQUALITY(lclFilteredA.graph.entries.extent(0), lclExpectedFilteredA.graph.entries.extent(0)); + for (size_t entry = 0; entry < std::min(lclFilteredA.graph.entries.extent(0), lclExpectedFilteredA.graph.entries.extent(0)); ++entry) { + TEST_EQUALITY(lclFilteredA.graph.entries(entry), lclExpectedFilteredA.graph.entries(entry)); + TEST_EQUALITY(lclFilteredA.values(entry), lclExpectedFilteredA.values(entry)); + } + + auto boundaryNodes_h = Kokkos::create_mirror_view(boundaryNodes); + Kokkos::deep_copy(boundaryNodes_h, boundaryNodes); + TEST_EQUALITY(boundaryNodes_h.extent(0), expectedBoundaryNodes.size()); + for (size_t row = 0; row < std::min(boundaryNodes_h.extent(0), expectedBoundaryNodes.size()); ++row) + TEST_EQUALITY(boundaryNodes_h(row), expectedBoundaryNodes[row]); + } + out << "Done with test number " << testNo << std::endl; + } +} + +#define MUELU_ETI_GROUP(SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, Constructor, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithoutFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicScalarWithFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithoutFiltering, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, AggresiveDroppingIsMarkedAsBoundary, SC, LO, GO, NO) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, 2x2, SC, LO, GO, NO) // TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(CoalesceDropFactory_kokkos, ClassicBlockWithFiltering, SC, LO, GO, NO) // not implemented yet diff --git a/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp b/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp index cb8ecf40479d..ff400f9ca3cb 100644 --- a/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp +++ b/packages/muelu/test/unit_tests_kokkos/MueLu_TestHelpers_kokkos.hpp @@ -149,6 +149,150 @@ class TestFactory { return Op; } + static typename Matrix::local_matrix_type::HostMirror buildLocal2x2Host(Scalar a00, Scalar a01, Scalar a10, Scalar a11, const bool keepZeros) { + using local_matrix_type = typename Matrix::local_matrix_type::HostMirror; + using local_graph_type = typename CrsGraph::local_graph_type::HostMirror; + using rowptr_type = typename local_graph_type::row_map_type::non_const_type; + using entries_type = typename local_graph_type::entries_type::non_const_type; + using values_type = typename local_matrix_type::values_type::non_const_type; + + using TST = Teuchos::ScalarTraits; + size_t nnz; + if (keepZeros) + nnz = 4; + else + nnz = (TST::magnitude(a00) > TST::eps()) + (TST::magnitude(a01) > TST::eps()) + (TST::magnitude(a10) > TST::eps()) + (TST::magnitude(a11) > TST::eps()); + + auto rowptr = rowptr_type("rowptr", 3); + auto entries = entries_type("entries", nnz); + auto values = values_type("entries", nnz); + { + auto rowptr_h = Kokkos::create_mirror_view(rowptr); + auto entries_h = Kokkos::create_mirror_view(entries); + auto values_h = Kokkos::create_mirror_view(values); + + size_t k = 0; + + rowptr_h(0) = k; + if (keepZeros || TST::magnitude(a00) > TST::eps()) { + entries_h(k) = 0; + values_h(k) = a00; + ++k; + } + if (keepZeros || TST::magnitude(a01) > TST::eps()) { + entries_h(k) = 1; + values_h(k) = a01; + ++k; + } + + rowptr_h(1) = k; + if (keepZeros || TST::magnitude(a10) > TST::eps()) { + entries_h(k) = 0; + values_h(k) = a10; + ++k; + } + + if (keepZeros || TST::magnitude(a11) > TST::eps()) { + entries_h(k) = 1; + values_h(k) = a11; + ++k; + } + rowptr_h(2) = k; + + Kokkos::deep_copy(rowptr, rowptr_h); + Kokkos::deep_copy(entries, entries_h); + Kokkos::deep_copy(values, values_h); + } + auto lclA = local_matrix_type("A", 2, 2, nnz, values, rowptr, entries); + return lclA; + } + + static std::string localMatToString(typename Matrix::local_matrix_type::HostMirror& mat) { + std::stringstream s; + typename Matrix::local_ordinal_type numCols = mat.numCols(); + for (typename Matrix::local_ordinal_type row_id = 0; row_id < mat.numRows(); ++row_id) { + auto row = mat.row(row_id); + for (typename Matrix::local_ordinal_type col_id = 0; col_id < numCols; ++col_id) { + bool found = false; + for (typename Matrix::local_ordinal_type colPtr = 0; colPtr < row.length; ++colPtr) { + if (row.colidx(colPtr) == col_id) { + s << row.value(colPtr) << " "; + found = true; + } + } + if (not found) + s << 0.0 << " "; + } + s << "\n"; + } + return s.str(); + } + + static typename Matrix::local_matrix_type buildLocal2x2(Scalar a00, Scalar a01, Scalar a10, Scalar a11) { + using local_matrix_type = typename Matrix::local_matrix_type; + using local_graph_type = typename CrsGraph::local_graph_type; + using rowptr_type = typename local_graph_type::row_map_type::non_const_type; + using entries_type = typename local_graph_type::entries_type::non_const_type; + using values_type = typename local_matrix_type::values_type::non_const_type; + + using TST = Teuchos::ScalarTraits; + size_t nnz = (TST::magnitude(a00) > TST::eps()) + (TST::magnitude(a01) > TST::eps()) + (TST::magnitude(a10) > TST::eps()) + (TST::magnitude(a11) > TST::eps()); + + auto rowptr = rowptr_type("rowptr", 3); + auto entries = entries_type("entries", nnz); + auto values = values_type("entries", nnz); + { + auto rowptr_h = Kokkos::create_mirror_view(rowptr); + auto entries_h = Kokkos::create_mirror_view(entries); + auto values_h = Kokkos::create_mirror_view(values); + + size_t k = 0; + + rowptr_h(0) = k; + if (TST::magnitude(a00) > TST::eps()) { + entries_h(k) = 0; + values_h(k) = a00; + ++k; + } + if (TST::magnitude(a01) > TST::eps()) { + entries_h(k) = 1; + values_h(k) = a01; + ++k; + } + + rowptr_h(1) = k; + if (TST::magnitude(a10) > TST::eps()) { + entries_h(k) = 0; + values_h(k) = a10; + ++k; + } + + if (TST::magnitude(a11) > TST::eps()) { + entries_h(k) = 1; + values_h(k) = a11; + ++k; + } + rowptr_h(2) = k; + + Kokkos::deep_copy(rowptr, rowptr_h); + Kokkos::deep_copy(entries, entries_h); + Kokkos::deep_copy(values, values_h); + } + auto lclA = local_matrix_type("A", 2, 2, nnz, values, rowptr, entries); + return lclA; + } + + static RCP build2x2(Xpetra::UnderlyingLib lib, Scalar a00, Scalar a01, Scalar a10, Scalar a11) { + auto lclA = buildLocal2x2(a00, a01, a10, a11); + + RCP > comm = TestHelpers_kokkos::Parameters::getDefaultComm(); + if (lib == Xpetra::NotSpecified) + lib = TestHelpers_kokkos::Parameters::getLib(); + RCP map = MapFactory::Build(lib, 2 * comm->getSize(), 0, comm); + + return MatrixFactory::Build(lclA, map, map); + } + // Create a 1D Poisson matrix with the specified number of rows // nx: global number of rows static RCP Build1DPoisson(GO nx, Xpetra::UnderlyingLib lib = Xpetra::NotSpecified) { // global_size_t