From 346f9ab299eef694058d8e687fea4805917319be Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Fri, 22 May 2020 16:21:07 -0600 Subject: [PATCH 1/2] MueLu Utilities: Add Kokkos version of ApplyOAZToMatrixRows --- .../src/Utils/MueLu_UtilitiesBase_decl.hpp | 23 +++++--- .../src/Utils/MueLu_Utilities_kokkos_decl.hpp | 4 ++ .../src/Utils/MueLu_Utilities_kokkos_def.hpp | 58 +++++++++++++++++++ 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index 5bb15ec566b5..79683bb04759 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -852,7 +852,7 @@ namespace MueLu { // Takes a vector of row indices static void ApplyOAZToMatrixRows(Teuchos::RCP >& A, const std::vector& dirichletRows) { - RCP Rmap = A->getColMap(); + RCP Rmap = A->getRowMap(); RCP Cmap = A->getColMap(); Scalar one =Teuchos::ScalarTraits::one(); Scalar zero =Teuchos::ScalarTraits::zero(); @@ -878,11 +878,15 @@ namespace MueLu { // Takes a Boolean array. static void ApplyOAZToMatrixRows(Teuchos::RCP >& A, const Teuchos::ArrayRCP& dirichletRows) { - RCP Rmap = A->getColMap(); + TEUCHOS_ASSERT(A->isFillComplete()); + RCP domMap = A->getDomainMap(); + RCP ranMap = A->getRangeMap(); + RCP Rmap = A->getRowMap(); RCP Cmap = A->getColMap(); - Scalar one =Teuchos::ScalarTraits::one(); - Scalar zero =Teuchos::ScalarTraits::zero(); - + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == Rmap->getNodeNumElements()); + const Scalar one = Teuchos::ScalarTraits::one(); + const Scalar zero = Teuchos::ScalarTraits::zero(); + A->resumeFill(); for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { if (dirichletRows[i]){ GlobalOrdinal row_gid = Rmap->getGlobalElement(i); @@ -890,16 +894,18 @@ namespace MueLu { Teuchos::ArrayView indices; Teuchos::ArrayView values; A->getLocalRowView(i,indices,values); - // NOTE: This won't work with fancy node types. - Scalar* valuesNC = const_cast(values.getRawPtr()); + + Teuchos::ArrayRCP valuesNC(values.size()); for(size_t j=0; j<(size_t)indices.size(); j++) { if(Cmap->getGlobalElement(indices[j])==row_gid) valuesNC[j]=one; else valuesNC[j]=zero; } + A->replaceLocalValues(i,indices,valuesNC()); } } + A->fillComplete(domMap, ranMap); } // Zeros out rows @@ -923,6 +929,7 @@ namespace MueLu { static void ZeroDirichletRows(Teuchos::RCP >& A, const Teuchos::ArrayRCP& dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits::zero()) { + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == A->getRowMap()->getNodeNumElements()); for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { if (dirichletRows[i]) { Teuchos::ArrayView indices; @@ -941,6 +948,7 @@ namespace MueLu { static void ZeroDirichletRows(Teuchos::RCP >& X, const Teuchos::ArrayRCP& dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits::zero()) { + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == X->getMap()->getNodeNumElements()); for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { if (dirichletRows[i]) { for(size_t j=0; jgetNumVectors(); j++) @@ -954,6 +962,7 @@ namespace MueLu { static void ZeroDirichletCols(Teuchos::RCP& A, const Teuchos::ArrayRCP& dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits::zero()) { + TEUCHOS_ASSERT(static_cast(dirichletCols.size()) == A->getColMap()->getNodeNumElements()); for(size_t i=0; igetNodeNumRows(); i++) { Teuchos::ArrayView indices; Teuchos::ArrayView values; diff --git a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp index bf7ee2dec152..5fc84d4848aa 100644 --- a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_decl.hpp @@ -304,6 +304,8 @@ namespace MueLu { */ static RCP > CuthillMcKee(const Matrix &Op); + static void ApplyOAZToMatrixRows(RCP& A, const Kokkos::View& dirichletRows); + }; // class Utils @@ -716,6 +718,8 @@ namespace MueLu { */ static RCP > CuthillMcKee(const Matrix &Op); + static void ApplyOAZToMatrixRows(RCP& A, const Kokkos::View& dirichletRows); + }; // class Utilities (specialization SC=double LO=GO=int) diff --git a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp index a39e17cc030f..8144de4b0ff2 100644 --- a/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp +++ b/packages/muelu/src/Utils/MueLu_Utilities_kokkos_def.hpp @@ -729,6 +729,64 @@ namespace MueLu { return MueLu::CuthillMcKee(Op); } + // Applies Ones-and-Zeros to matrix rows + // Takes a Boolean array. + template + void + ApplyOAZToMatrixRows(Teuchos::RCP >& A, + const Kokkos::View& dirichletRows) { + TEUCHOS_ASSERT(A->isFillComplete()); + using ATS = Kokkos::ArithTraits; + using impl_ATS = Kokkos::ArithTraits; + using range_type = Kokkos::RangePolicy; + + RCP > domMap = A->getDomainMap(); + RCP > ranMap = A->getRangeMap(); + RCP > Rmap = A->getRowMap(); + RCP > Cmap = A->getColMap(); + + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == Rmap->getNodeNumElements()); + + const Scalar one = impl_ATS::one(); + const Scalar zero = impl_ATS::zero(); + + auto localMatrix = A->getLocalMatrix(); + auto localRmap = Rmap->getLocalMap(); + auto localCmap = Cmap->getLocalMap(); + + Kokkos::parallel_for("MueLu::Utils::ApplyOAZ",range_type(0,dirichletRows.extent(0)), + KOKKOS_LAMBDA(const LocalOrdinal row) { + if (dirichletRows(row)){ + auto rowView = localMatrix.row(row); + auto length = rowView.length; + auto row_gid = localRmap.getGlobalElement(row); + auto row_lid = localCmap.getLocalElement(row_gid); + + for (decltype(length) colID = 0; colID < length; colID++) + if (rowView.colidx(colID) == row_lid) + rowView.value(colID) = one; + else + rowView.value(colID) = zero; + } + }); + } + + template + void + Utilities_kokkos:: + ApplyOAZToMatrixRows(Teuchos::RCP >& A, + const Kokkos::View& dirichletRows) { + MueLu::ApplyOAZToMatrixRows(A, dirichletRows); + } + + template + void + Utilities_kokkos:: + ApplyOAZToMatrixRows(Teuchos::RCP >& A, + const Kokkos::View& dirichletRows) { + MueLu::ApplyOAZToMatrixRows(A, dirichletRows); + } + } //namespace MueLu #define MUELU_UTILITIES_KOKKOS_SHORT From 5893de8486038af53b6205aa0aadc13fb6b83384 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Fri, 22 May 2020 16:22:24 -0600 Subject: [PATCH 2/2] MueLu RefMaxwell: Apply BCs to A_nodal --- .../adapters/xpetra/MueLu_RefMaxwell_decl.hpp | 17 +- .../adapters/xpetra/MueLu_RefMaxwell_def.hpp | 246 +++++++++++++----- 2 files changed, 194 insertions(+), 69 deletions(-) diff --git a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp index 3a22591f61cd..98a53a002a16 100644 --- a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp +++ b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_decl.hpp @@ -389,6 +389,15 @@ namespace MueLu { //! dump out real-valued multivector void dumpCoords(const RealValuedMultiVector& X, std::string name) const; + //! dump out boolean ArrayView + void dump(const Teuchos::ArrayRCP& v, std::string name) const; + +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + //! dump out boolean Kokkos::View + void dump(const Kokkos::View& v, std::string name) const; +#endif + + //! get a (synced) timer Teuchos::RCP getTimer(std::string name, RCP > comm=Teuchos::null) const; //! set parameters @@ -413,12 +422,10 @@ namespace MueLu { Teuchos::RCP A_nodal_Matrix_, P11_, R11_, AH_, A22_, Addon_Matrix_; //! Vectors for BCs #ifdef HAVE_MUELU_KOKKOS_REFACTOR - Kokkos::View BCrowsKokkos_; - Kokkos::View BCcolsKokkos_; + Kokkos::View BCrowsKokkos_, BCcolsKokkos_, BCdomainKokkos_; #endif - int BCrowcount_, BCcolcount_; - Teuchos::ArrayRCP BCrows_; - Teuchos::ArrayRCP BCcols_; + int BCedges_, BCnodes_; + Teuchos::ArrayRCP BCrows_, BCcols_, BCdomain_; //! Nullspace Teuchos::RCP Nullspace_; //! Coordinates diff --git a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp index 7faeb69df280..161c0a7935af 100644 --- a/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp +++ b/packages/muelu/adapters/xpetra/MueLu_RefMaxwell_def.hpp @@ -105,6 +105,120 @@ namespace MueLu { + template + void FindNonZeros(const Teuchos::ArrayRCP vals, + Teuchos::ArrayRCP nonzeros) { + TEUCHOS_ASSERT(vals.size() == nonzeros.size()); + typedef typename Teuchos::ScalarTraits::magnitudeType magnitudeType; + const magnitudeType eps = 2.0*Teuchos::ScalarTraits::eps(); + for(size_t i=0; i(vals.size()); i++) { + nonzeros[i] = (Teuchos::ScalarTraits::magnitude(vals[i]) > eps); + } + } + + + template + void DetectDirichletCols(const Xpetra::Matrix& A, + const Teuchos::ArrayRCP& dirichletRows, + Teuchos::ArrayRCP dirichletCols, + Teuchos::ArrayRCP dirichletDomain) { + const Scalar one = Teuchos::ScalarTraits::one(); + RCP > domMap = A.getDomainMap(); + RCP > rowMap = A.getRowMap(); + RCP > colMap = A.getColMap(); + TEUCHOS_ASSERT(static_cast(dirichletRows.size()) == rowMap->getNodeNumElements()); + TEUCHOS_ASSERT(static_cast(dirichletCols.size()) == colMap->getNodeNumElements()); + TEUCHOS_ASSERT(static_cast(dirichletDomain.size()) == domMap->getNodeNumElements()); + RCP > myColsToZero = Xpetra::MultiVectorFactory::Build(colMap, 1, /*zeroOut=*/true); + // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0 + for(size_t i=0; i<(size_t) dirichletRows.size(); i++) { + if (dirichletRows[i]) { + ArrayView indices; + ArrayView values; + A.getLocalRowView(i,indices,values); + for(size_t j=0; j(indices.size()); j++) + myColsToZero->replaceLocalValue(indices[j],0,one); + } + } + + RCP > globalColsToZero; + RCP > importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + globalColsToZero = Xpetra::MultiVectorFactory::Build(domMap, 1, /*zeroOut=*/true); + // export to domain map + globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD); + // import to column map + myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT); + } + else + globalColsToZero = myColsToZero; + + FindNonZeros(globalColsToZero->getData(0),dirichletDomain); + FindNonZeros(myColsToZero->getData(0),dirichletCols); + } + + +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + + template + void FindNonZeros(const typename Xpetra::MultiVector::dual_view_type::t_dev_um vals, + Kokkos::View nonzeros) { + using ATS = Kokkos::ArithTraits; + using range_type = Kokkos::RangePolicy; + TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0)); + const typename ATS::magnitudeType eps = 2.0*ATS::eps(); + + Kokkos::parallel_for("MueLu:RefMaxwell::FindNonZeros", range_type(0,vals.extent(0)), + KOKKOS_LAMBDA (const size_t i) { + nonzeros(i) = (ATS::magnitude(vals(i,0)) > eps); + }); + } + + template + void DetectDirichletCols(const Xpetra::Matrix& A, + const Kokkos::View & dirichletRows, + Kokkos::View dirichletCols, + Kokkos::View dirichletDomain) { + using range_type = Kokkos::RangePolicy; + const Scalar one = Teuchos::ScalarTraits::one(); + RCP > domMap = A.getDomainMap(); + RCP > rowMap = A.getRowMap(); + RCP > colMap = A.getColMap(); + TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getNodeNumElements()); + TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getNodeNumElements()); + TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getNodeNumElements()); + RCP > myColsToZero = Xpetra::VectorFactory::Build(colMap, /*zeroOut=*/true); + // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0 + auto myColsToZeroView = myColsToZero->template getLocalView(); + auto localMatrix = A.getLocalMatrix(); + Kokkos::parallel_for("MueLu:RefMaxwell::DetectDirichletCols", range_type(0,rowMap->getNodeNumElements()), + KOKKOS_LAMBDA(const LocalOrdinal row) { + if (dirichletRows(row)) { + auto rowView = localMatrix.row(row); + auto length = rowView.length; + + for (decltype(length) colID = 0; colID < length; colID++) + myColsToZeroView(rowView.colidx(colID),0) = one; + } + }); + + RCP > globalColsToZero; + RCP > importer = A.getCrsGraph()->getImporter(); + if (!importer.is_null()) { + globalColsToZero = Xpetra::VectorFactory::Build(domMap, /*zeroOut=*/true); + // export to domain map + globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD); + // import to column map + myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT); + } + else + globalColsToZero = myColsToZero; + FindNonZeros(globalColsToZero->template getLocalView(),dirichletDomain); + FindNonZeros(myColsToZero->template getLocalView(),dirichletCols); + } + +#endif + template Teuchos::RCP > RefMaxwell::getDomainMap() const { return SM_Matrix_->getDomainMap(); @@ -303,6 +417,8 @@ namespace MueLu { // Find rows with only 1 or 2 nonzero entries, record them in BCrows_. // BCrows_[i] is true, iff i is a boundary row // BCcols_[i] is true, iff i is a boundary column + int BCedgesLocal = 0; + int BCnodesLocal = 0; #ifdef HAVE_MUELU_KOKKOS_REFACTOR if (useKokkos_) { BCrowsKokkos_ = Utilities_kokkos::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits::eps(),/*count_twos_as_dirichlet=*/true); @@ -330,31 +446,22 @@ namespace MueLu { } } - BCcolsKokkos_ = Utilities_kokkos::DetectDirichletCols(*D0_Matrix_,BCrowsKokkos_); + BCcolsKokkos_ = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getColMap()->getNodeNumElements()); + BCdomainKokkos_ = Kokkos::View(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), D0_Matrix_->getDomainMap()->getNodeNumElements()); + DetectDirichletCols(*D0_Matrix_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_); + + dump(BCrowsKokkos_, "BCrows.m"); + dump(BCcolsKokkos_, "BCcols.m"); + dump(BCdomainKokkos_, "BCdomain.m"); - int BCrowcountLocal = 0; for (size_t i = 0; igetRowMap()->getComm(), BCrowcountLocal, BCrowcount_); -#else - BCrowcount_ = BCrowcountLocal; -#endif - int BCcolcountLocal = 0; - for (size_t i = 0; igetRowMap()->getComm(), BCcolcountLocal, BCcolcount_); -#else - BCcolcount_ = BCcolcountLocal; -#endif - if (IsPrint(Statistics2)) { - GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount_ << " BC rows and " << BCcolcount_ << " BC columns." << std::endl; - } + BCedgesLocal += 1; + for (size_t i = 0; i(Utilities::DetectDirichletRows(*SM_Matrix_,Teuchos::ScalarTraits::eps(),/*count_twos_as_dirichlet=*/true)); @@ -381,57 +488,36 @@ namespace MueLu { } } - BCcols_ = Utilities::DetectDirichletCols(*D0_Matrix_,BCrows_); - int BCrowcountLocal = 0; + BCcols_.resize(D0_Matrix_->getColMap()->getNodeNumElements()); + BCdomain_.resize(D0_Matrix_->getDomainMap()->getNodeNumElements()); + DetectDirichletCols(*D0_Matrix_,BCrows_,BCcols_,BCdomain_); + + dump(BCrows_, "BCrows.m"); + dump(BCcols_, "BCcols.m"); + dump(BCdomain_, "BCdomain.m"); + for (auto it = BCrows_.begin(); it != BCrows_.end(); ++it) if (*it) - BCrowcountLocal += 1; -#ifdef HAVE_MPI - MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCrowcountLocal, BCrowcount_); -#else - BCrowcount_ = BCrowcountLocal; -#endif - int BCcolcountLocal = 0; - for (auto it = BCcols_.begin(); it != BCcols_.end(); ++it) + BCedgesLocal += 1; + for (auto it = BCdomain_.begin(); it != BCdomain_.end(); ++it) if (*it) - BCcolcountLocal += 1; + BCnodesLocal += 1; + } + #ifdef HAVE_MPI - MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCcolcountLocal, BCcolcount_); + MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCedgesLocal, BCedges_); + MueLu_sumAll(SM_Matrix_->getRowMap()->getComm(), BCnodesLocal, BCnodes_); #else - BCcolcount_ = BCcolcountLocal; + BCedges_ = BCedgesLocal; + BCnodes_ = BCnodesLocal; #endif - if (IsPrint(Statistics2)) { - GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCrowcount_ << " BC rows and " << BCcolcount_ << " BC columns." << std::endl; - } - } + if (IsPrint(Statistics2)) { + GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl; + } - TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(BCrowcount_) >= D0_Matrix_->getRangeMap()->getGlobalNumElements(), Exceptions::RuntimeError, + TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as(BCedges_) >= D0_Matrix_->getRangeMap()->getGlobalNumElements(), Exceptions::RuntimeError, "All edges are detected as boundary edges!"); - if (dump_matrices_) { - GetOStream(Runtime0) << "Dumping BCrows, BCcols" << std::endl; - std::ofstream outBCrows("BCrows.m"); - std::ofstream outBCcols("BCcols.m"); -#ifdef HAVE_MUELU_KOKKOS_REFACTOR - if (useKokkos_) { - auto BCrows = Kokkos::create_mirror_view (BCrowsKokkos_); - Kokkos::deep_copy(BCrows , BCrowsKokkos_); - for (size_t i = 0; i < BCrows.size(); i++) - outBCrows << BCrows[i] << "\n"; - - auto BCcols = Kokkos::create_mirror_view (BCcolsKokkos_); - Kokkos::deep_copy(BCcols , BCcolsKokkos_); - for (size_t i = 0; i < BCcols.size(); i++) - outBCcols << BCcols[i] << "\n"; - } else -#endif - { - for (size_t i = 0; i < Teuchos::as(BCrows_.size()); i++) - outBCrows << BCrows_[i] << "\n"; - for (size_t i = 0; i < Teuchos::as(BCcols_.size()); i++) - outBCcols << BCcols_[i] << "\n"; - } - } } //////////////////////////////////////////////////////////////////////////////// @@ -542,6 +628,14 @@ namespace MueLu { coarseLevel.Request("A", rapFact.get()); A_nodal_Matrix_ = coarseLevel.Get< RCP >("A", rapFact.get()); + + // Apply boundary conditions to A_nodal +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + if (useKokkos_) + Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_); + else +#endif + Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_); dump(*A_nodal_Matrix_, "A_nodal.m"); // build special prolongator @@ -997,7 +1091,7 @@ namespace MueLu { std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower); std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper); } - if (BCrowcount_ == 0 && + if (BCedges_ == 0 && (coarseType == "" || coarseType == "Klu" || coarseType == "Klu2") && @@ -1258,6 +1352,30 @@ namespace MueLu { } + template + void RefMaxwell::dump(const Teuchos::ArrayRCP& v, std::string name) const { + if (dump_matrices_) { + GetOStream(Runtime0) << "Dumping to " << name << std::endl; + std::ofstream out(name); + for (size_t i = 0; i < Teuchos::as(v.size()); i++) + out << v[i] << "\n"; + } + } + +#ifdef HAVE_MUELU_KOKKOS_REFACTOR + template + void RefMaxwell::dump(const Kokkos::View& v, std::string name) const { + if (dump_matrices_) { + GetOStream(Runtime0) << "Dumping to " << name << std::endl; + std::ofstream out(name); + auto vH = Kokkos::create_mirror_view (v); + Kokkos::deep_copy(vH , v); + for (size_t i = 0; i < vH.size(); i++) + out << vH[i] << "\n"; + } + } +#endif + template Teuchos::RCP RefMaxwell::getTimer(std::string name, RCP > comm) const { if (IsPrint(Timings)) {