From 85a779b2bc83aa3fc6491065fd41d0a617444e19 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Tue, 21 Jun 2022 17:49:56 -0600 Subject: [PATCH] Ifpack2: AdditiveSchwarz matrix filtering in parallel Add a fast path ("Details::AdditiveSchwarzFilter") where LocalFilter, SingletonFilter and ReorderFilter are applied all at once on device. This benefits initialize, compute and apply, since the old path only supported host matrix access with getLocalRowCopy, and each filter pass did its own copy. The old path is still in place to support non-CrsMatrix inputs to AdditiveSchwarz. Also fix #9606 by having AdditiveSchwarz::compute re-import the halo rows of the OverlappingRowMatrix (ExtMatrix_). --- packages/ifpack2/src/CMakeLists.txt | 1 + .../src/Ifpack2_AdditiveSchwarz_decl.hpp | 9 +- .../src/Ifpack2_AdditiveSchwarz_def.hpp | 441 +++++++---- ...ck2_Details_AdditiveSchwarzFilter_decl.hpp | 412 ++++++++++ ...ack2_Details_AdditiveSchwarzFilter_def.hpp | 729 ++++++++++++++++++ packages/ifpack2/src/Ifpack2_IlukGraph.hpp | 1 + .../src/Ifpack2_OverlappingRowMatrix_decl.hpp | 11 +- .../src/Ifpack2_OverlappingRowMatrix_def.hpp | 19 +- .../Ifpack2_UnitTestAdditiveSchwarz.cpp | 2 + ...k2_UnitTestLocalSparseTriangularSolver.cpp | 13 - .../tpetra/core/src/Tpetra_CrsGraph_decl.hpp | 4 +- .../tpetra/core/src/Tpetra_CrsGraph_def.hpp | 2 + 12 files changed, 1460 insertions(+), 184 deletions(-) create mode 100644 packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp create mode 100644 packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp diff --git a/packages/ifpack2/src/CMakeLists.txt b/packages/ifpack2/src/CMakeLists.txt index bbb81af997d3..d16b67b28e6c 100644 --- a/packages/ifpack2/src/CMakeLists.txt +++ b/packages/ifpack2/src/CMakeLists.txt @@ -248,6 +248,7 @@ IF(Ifpack2_ENABLE_EXPLICIT_INSTANTIATION) SparsityFilter ContainerFactory TriDiContainer + Details::AdditiveSchwarzFilter Details::Chebyshev Details::ChebyshevKernel Details::DenseSolver diff --git a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp index 9d75ee4fa59c..8c0a135e5451 100644 --- a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp @@ -56,6 +56,7 @@ #include "Ifpack2_Preconditioner.hpp" #include "Ifpack2_Details_CanChangeMatrix.hpp" #include "Ifpack2_Details_NestedPreconditioner.hpp" +#include "Ifpack2_OverlappingRowMatrix.hpp" #include "Tpetra_Map.hpp" #include "Tpetra_MultiVector.hpp" #include "Tpetra_RowMatrix.hpp" @@ -329,6 +330,12 @@ class AdditiveSchwarz : using row_matrix_type = Tpetra::RowMatrix; + + //! The Tpetra::CrsMatrix specialization that is a subclass of MatrixType. + using crs_matrix_type = + Tpetra::CrsMatrix; + //@} // \name Constructors and destructor //@{ @@ -774,7 +781,7 @@ class AdditiveSchwarz : /// \brief The overlapping matrix. /// /// If nonnull, this is an instance of OverlappingRowMatrix. - Teuchos::RCP OverlappingMatrix_; + Teuchos::RCP> OverlappingMatrix_; //! The reordered matrix. Teuchos::RCP ReorderedLocalizedMatrix_; diff --git a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp index 58beacd301a7..91471e0f96d1 100644 --- a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp +++ b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp @@ -69,11 +69,11 @@ #endif #include "Ifpack2_Details_CanChangeMatrix.hpp" -#include "Ifpack2_LocalFilter.hpp" -#include "Ifpack2_OverlappingRowMatrix.hpp" #include "Ifpack2_Parameters.hpp" +#include "Ifpack2_LocalFilter.hpp" #include "Ifpack2_ReorderFilter.hpp" #include "Ifpack2_SingletonFilter.hpp" +#include "Ifpack2_Details_AdditiveSchwarzFilter.hpp" #ifdef HAVE_MPI #include "Teuchos_DefaultMpiComm.hpp" @@ -463,24 +463,14 @@ apply (const Tpetra::MultiVector overlap_mat_type; - RCP overlapMatrix; + // do communication if necessary if (IsOverlapping_) { - overlapMatrix = rcp_dynamic_cast (OverlappingMatrix_); TEUCHOS_TEST_FOR_EXCEPTION - (overlapMatrix.is_null (), std::logic_error, prefix << + (OverlappingMatrix_.is_null (), std::logic_error, prefix << "IsOverlapping_ is true, but OverlappingMatrix_, while nonnull, is " "not an OverlappingRowMatrix. Please report this " "bug to the Ifpack2 developers."); - } - - // do communication if necessary - if (IsOverlapping_) { - TEUCHOS_TEST_FOR_EXCEPTION - (overlapMatrix.is_null (), std::logic_error, prefix - << "overlapMatrix is null when it shouldn't be. " - "Please report this bug to the Ifpack2 developers."); - overlapMatrix->importMultiVector (*R, *OverlappingB, Tpetra::INSERT); + OverlappingMatrix_->importMultiVector (*R, *OverlappingB, Tpetra::INSERT); //JJH We don't need to import the solution Y we are always solving AY=R with initial guess zero //if (ZeroStartingSolution_ == false) @@ -556,10 +546,10 @@ apply (const Tpetra::MultiVectorexportMultiVector (*OverlappingY, *C, CombineMode_); + OverlappingMatrix_->exportMultiVector (*OverlappingY, *C, CombineMode_); } else { // mfh 16 Apr 2014: Make a view of Y with the same Map as @@ -630,63 +620,83 @@ localApply (MV& OverlappingB, MV& OverlappingY) const using Teuchos::rcp_dynamic_cast; const size_t numVectors = OverlappingB.getNumVectors (); - if (FilterSingletons_) { - // process singleton filter - MV ReducedB (SingletonMatrix_->getRowMap (), numVectors); - MV ReducedY (SingletonMatrix_->getRowMap (), numVectors); - RCP > singletonFilter = - rcp_dynamic_cast > (SingletonMatrix_); - TEUCHOS_TEST_FOR_EXCEPTION - (! SingletonMatrix_.is_null () && singletonFilter.is_null (), - std::logic_error, "Ifpack2::AdditiveSchwarz::localApply: " - "SingletonFilter_ is nonnull but is not a SingletonFilter" - ". This should never happen. Please report this bug " - "to the Ifpack2 developers."); - singletonFilter->SolveSingletons (OverlappingB, OverlappingY); - singletonFilter->CreateReducedRHS (OverlappingY, OverlappingB, ReducedB); - - // process reordering - if (! UseReordering_) { - Inverse_->solve (ReducedY, ReducedB); - } - else { - RCP > rf = - rcp_dynamic_cast > (ReorderedLocalizedMatrix_); + auto additiveSchwarzFilter = rcp_dynamic_cast>(innerMatrix_); + if(additiveSchwarzFilter) + { + //Create the reduced system innerMatrix_ * ReducedY = ReducedB. + //This effectively fuses 3 tasks: + // -SingletonFilter::SolveSingletons (solve entries of OverlappingY corresponding to singletons) + // -SingletonFilter::CreateReducedRHS (fill ReducedReorderedB from OverlappingB, with entries in singleton columns eliminated) + // -ReorderFilter::permuteOriginalToReordered (apply permutation to ReducedReorderedB) + MV ReducedReorderedB (additiveSchwarzFilter->getRowMap(), numVectors); + MV ReducedReorderedY (additiveSchwarzFilter->getRowMap(), numVectors); + additiveSchwarzFilter->CreateReducedProblem(OverlappingB, OverlappingY, ReducedReorderedB); + //Apply inner solver + Inverse_->solve (ReducedReorderedY, ReducedReorderedB); + //Scatter ReducedY back to non-singleton rows of OverlappingY, according to the reordering. + additiveSchwarzFilter->UpdateLHS(ReducedReorderedY, OverlappingY); + } + else + { + if (FilterSingletons_) { + // process singleton filter + MV ReducedB (SingletonMatrix_->getRowMap (), numVectors); + MV ReducedY (SingletonMatrix_->getRowMap (), numVectors); + + RCP > singletonFilter = + rcp_dynamic_cast > (SingletonMatrix_); TEUCHOS_TEST_FOR_EXCEPTION - (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, - "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " - "nonnull but is not a ReorderFilter. This should " - "never happen. Please report this bug to the Ifpack2 developers."); - MV ReorderedB (ReducedB, Teuchos::Copy); - MV ReorderedY (ReducedY, Teuchos::Copy); - rf->permuteOriginalToReordered (ReducedB, ReorderedB); - Inverse_->solve (ReorderedY, ReorderedB); - rf->permuteReorderedToOriginal (ReorderedY, ReducedY); - } + (! SingletonMatrix_.is_null () && singletonFilter.is_null (), + std::logic_error, "Ifpack2::AdditiveSchwarz::localApply: " + "SingletonFilter_ is nonnull but is not a SingletonFilter" + ". This should never happen. Please report this bug " + "to the Ifpack2 developers."); + singletonFilter->SolveSingletons (OverlappingB, OverlappingY); + singletonFilter->CreateReducedRHS (OverlappingY, OverlappingB, ReducedB); + + // process reordering + if (! UseReordering_) { + Inverse_->solve (ReducedY, ReducedB); + } + else { + RCP > rf = + rcp_dynamic_cast > (ReorderedLocalizedMatrix_); + TEUCHOS_TEST_FOR_EXCEPTION + (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, + "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " + "nonnull but is not a ReorderFilter. This should " + "never happen. Please report this bug to the Ifpack2 developers."); + MV ReorderedB (ReducedB, Teuchos::Copy); + MV ReorderedY (ReducedY, Teuchos::Copy); + rf->permuteOriginalToReordered (ReducedB, ReorderedB); + Inverse_->solve (ReorderedY, ReorderedB); + rf->permuteReorderedToOriginal (ReorderedY, ReducedY); + } - // finish up with singletons - singletonFilter->UpdateLHS (ReducedY, OverlappingY); - } - else { - // process reordering - if (! UseReordering_) { - Inverse_->solve (OverlappingY, OverlappingB); + // finish up with singletons + singletonFilter->UpdateLHS (ReducedY, OverlappingY); } else { - MV ReorderedB (OverlappingB, Teuchos::Copy); - MV ReorderedY (OverlappingY, Teuchos::Copy); + // process reordering + if (! UseReordering_) { + Inverse_->solve (OverlappingY, OverlappingB); + } + else { + MV ReorderedB (OverlappingB, Teuchos::Copy); + MV ReorderedY (OverlappingY, Teuchos::Copy); - RCP > rf = - rcp_dynamic_cast > (ReorderedLocalizedMatrix_); - TEUCHOS_TEST_FOR_EXCEPTION - (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, - "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " - "nonnull but is not a ReorderFilter. This should " - "never happen. Please report this bug to the Ifpack2 developers."); - rf->permuteOriginalToReordered (OverlappingB, ReorderedB); - Inverse_->solve (ReorderedY, ReorderedB); - rf->permuteReorderedToOriginal (ReorderedY, OverlappingY); + RCP > rf = + rcp_dynamic_cast > (ReorderedLocalizedMatrix_); + TEUCHOS_TEST_FOR_EXCEPTION + (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, + "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " + "nonnull but is not a ReorderFilter. This should " + "never happen. Please report this bug to the Ifpack2 developers."); + rf->permuteOriginalToReordered (OverlappingB, ReorderedB); + Inverse_->solve (ReorderedY, ReorderedB); + rf->permuteReorderedToOriginal (ReorderedY, OverlappingY); + } } } } @@ -1015,7 +1025,6 @@ void AdditiveSchwarz::initialize () InitializeTime_ += (timer->wallTime() - startTime); } - template bool AdditiveSchwarz::isInitialized () const { @@ -1056,6 +1065,23 @@ void AdditiveSchwarz::compute () } double startTime = timer->wallTime(); + // compute () assumes that the values of Matrix_ (aka A) have changed. + // If this has overlap, do an import from the input matrix to the halo. + if (IsOverlapping_) { + OverlappingMatrix_->doExtImport(); + } + // At this point, either Matrix_ or OverlappingMatrix_ (depending on whether this is overlapping) + // has new values and unchanged structure. If we are using AdditiveSchwarzFilter, update the local matrix. + // + if(auto asf = Teuchos::rcp_dynamic_cast>(innerMatrix_)) + { + //NOTE: if this compute() call comes right after the initialize() with no intervening matrix changes, this call is redundant. + //initialize() already filled the local matrix. However, we have no way to tell if this is the case. + asf->updateMatrixValues(); + } + //Now, whether the Inverse_'s matrix is the AdditiveSchwarzFilter's local matrix or simply Matrix_/OverlappingMatrix_, + //it will be able to see the new values and update itself accordingly. + { // Start timing here. TimeMonitor timeMon (*timer); @@ -1317,114 +1343,212 @@ void AdditiveSchwarz::setup () "initialize: The matrix to precondition is null. You must either pass " "a nonnull matrix to the constructor, or call setMatrix() with a nonnull " "input, before you may call this method."); + + RCP innerPrecMatrix; - // Localized version of Matrix_ or OverlappingMatrix_. - RCP LocalizedMatrix; + //If the matrix is a CrsMatrix or OverlappingRowMatrix, use the high-performance + //AdditiveSchwarzFilter. Otherwise, use composition of Reordered/Singleton/LocalFilter. + auto matrixCrs = rcp_dynamic_cast(Matrix_); + if(!OverlappingMatrix_.is_null() || !matrixCrs.is_null()) + { + ArrayRCP perm; + ArrayRCP revperm; + if (UseReordering_) { +#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) + // Unlike Ifpack, Zoltan2 does all the dirty work here. + Teuchos::ParameterList zlist = List_.sublist ("schwarz: reordering list"); + ReorderingAlgorithm_ = zlist.get ("order_method", "rcm"); + + if(ReorderingAlgorithm_ == "user") { + // User-provided reordering + perm = zlist.get >("user ordering"); + revperm = zlist.get >("user reverse ordering"); + } + else { + // Zoltan2 reordering + typedef Tpetra::RowGraph + row_graph_type; + typedef Zoltan2::TpetraRowGraphAdapter z2_adapter_type; + auto constActiveGraph = Teuchos::rcp_const_cast( + IsOverlapping_ ? OverlappingMatrix_->getGraph() : Matrix_->getGraph()); + z2_adapter_type Zoltan2Graph (constActiveGraph); + + typedef Zoltan2::OrderingProblem ordering_problem_type; +#ifdef HAVE_MPI + // Grab the MPI Communicator and build the ordering problem with that + MPI_Comm myRawComm; + + RCP > mpicomm = + rcp_dynamic_cast > (Matrix_->getComm ()); + if (mpicomm == Teuchos::null) { + myRawComm = MPI_COMM_SELF; + } else { + myRawComm = * (mpicomm->getRawMpiComm ()); + } + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist, myRawComm); +#else + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist); +#endif + MyOrderingProblem.solve (); - // The "most current local matrix." At the end of this method, this - // will be handed off to the inner solver. - RCP ActiveMatrix; + { + typedef Zoltan2::LocalOrderingSolution + ordering_solution_type; - // Create localized matrix. - if (! OverlappingMatrix_.is_null ()) { - LocalizedMatrix = rcp (new LocalFilter (OverlappingMatrix_)); - } - else { - LocalizedMatrix = rcp (new LocalFilter (Matrix_)); - } + ordering_solution_type sol (*MyOrderingProblem.getLocalOrderingSolution()); - // Sanity check; I don't trust the logic above to have created LocalizedMatrix. - TEUCHOS_TEST_FOR_EXCEPTION( - LocalizedMatrix.is_null (), std::logic_error, - "Ifpack2::AdditiveSchwarz::setup: LocalizedMatrix is null, after the code " - "that claimed to have created it. This should never be the case. Please " - "report this bug to the Ifpack2 developers."); - - // Mark localized matrix as active - ActiveMatrix = LocalizedMatrix; - - // Singleton Filtering - if (FilterSingletons_) { - SingletonMatrix_ = rcp (new SingletonFilter (LocalizedMatrix)); - ActiveMatrix = SingletonMatrix_; + // perm[i] gives the where OLD index i shows up in the NEW + // ordering. revperm[i] gives the where NEW index i shows + // up in the OLD ordering. Note that perm is actually the + // "inverse permutation," in Zoltan2 terms. + perm = sol.getPermutationRCPConst (true); + revperm = sol.getPermutationRCPConst (); + } + } +#else + // This is a logic_error, not a runtime_error, because + // setParameters() should have excluded this case already. + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::logic_error, "Ifpack2::AdditiveSchwarz::setup: " + "The Zoltan2 and Xpetra packages must be enabled in order " + "to support reordering."); +#endif + } + else + { + local_ordinal_type numLocalRows = OverlappingMatrix_.is_null() ? matrixCrs->getLocalNumRows() : OverlappingMatrix_->getLocalNumRows(); + //Use an identity ordering. + //TODO: create a non-permuted code path in AdditiveSchwarzFilter, in the case that neither + //reordering nor singleton filtering are enabled. In this situation it's like LocalFilter. + perm = ArrayRCP(numLocalRows); + revperm = ArrayRCP(numLocalRows); + for(local_ordinal_type i = 0; i < numLocalRows; i++) + { + perm[i] = i; + revperm[i] = i; + } + } + //Now, construct the filter + RCP> asf; + if(OverlappingMatrix_.is_null()) + asf = rcp(new Details::AdditiveSchwarzFilter(matrixCrs, perm, revperm, FilterSingletons_)); + else + asf = rcp(new Details::AdditiveSchwarzFilter(OverlappingMatrix_, perm, revperm, FilterSingletons_)); + innerMatrix_ = asf; + //The matrix for the inner preconditioner is a CrsMatrix + innerPrecMatrix = asf->getFilteredMatrix(); } + else + { + // Localized version of Matrix_ or OverlappingMatrix_. + RCP LocalizedMatrix; - // Do reordering - if (UseReordering_) { -#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) - // Unlike Ifpack, Zoltan2 does all the dirty work here. - typedef ReorderFilter reorder_filter_type; - Teuchos::ParameterList zlist = List_.sublist ("schwarz: reordering list"); - ReorderingAlgorithm_ = zlist.get ("order_method", "rcm"); - - ArrayRCP perm; - ArrayRCP revperm; + // The "most current local matrix." At the end of this method, this + // will be handed off to the inner solver. + RCP ActiveMatrix; - if(ReorderingAlgorithm_ == "user") { - // User-provided reordering - perm = zlist.get >("user ordering"); - revperm = zlist.get >("user reverse ordering"); + // Create localized matrix. + if (! OverlappingMatrix_.is_null ()) { + LocalizedMatrix = rcp (new LocalFilter (OverlappingMatrix_)); } else { - // Zoltan2 reordering - typedef Tpetra::RowGraph - row_graph_type; - typedef Zoltan2::TpetraRowGraphAdapter z2_adapter_type; - RCP constActiveGraph = - Teuchos::rcp_const_cast(ActiveMatrix->getGraph()); - z2_adapter_type Zoltan2Graph (constActiveGraph); - - typedef Zoltan2::OrderingProblem ordering_problem_type; -#ifdef HAVE_MPI - // Grab the MPI Communicator and build the ordering problem with that - MPI_Comm myRawComm; + LocalizedMatrix = rcp (new LocalFilter (Matrix_)); + } - RCP > mpicomm = - rcp_dynamic_cast > (ActiveMatrix->getComm ()); - if (mpicomm == Teuchos::null) { - myRawComm = MPI_COMM_SELF; - } else { - myRawComm = * (mpicomm->getRawMpiComm ()); + // Sanity check; I don't trust the logic above to have created LocalizedMatrix. + TEUCHOS_TEST_FOR_EXCEPTION( + LocalizedMatrix.is_null (), std::logic_error, + "Ifpack2::AdditiveSchwarz::setup: LocalizedMatrix is null, after the code " + "that claimed to have created it. This should never be the case. Please " + "report this bug to the Ifpack2 developers."); + + // Mark localized matrix as active + ActiveMatrix = LocalizedMatrix; + + // Singleton Filtering + if (FilterSingletons_) { + SingletonMatrix_ = rcp (new SingletonFilter (LocalizedMatrix)); + ActiveMatrix = SingletonMatrix_; + } + + // Do reordering + if (UseReordering_) { +#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) + // Unlike Ifpack, Zoltan2 does all the dirty work here. + typedef ReorderFilter reorder_filter_type; + Teuchos::ParameterList zlist = List_.sublist ("schwarz: reordering list"); + ReorderingAlgorithm_ = zlist.get ("order_method", "rcm"); + + ArrayRCP perm; + ArrayRCP revperm; + + if(ReorderingAlgorithm_ == "user") { + // User-provided reordering + perm = zlist.get >("user ordering"); + revperm = zlist.get >("user reverse ordering"); } - ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist, myRawComm); + else { + // Zoltan2 reordering + typedef Tpetra::RowGraph + row_graph_type; + typedef Zoltan2::TpetraRowGraphAdapter z2_adapter_type; + RCP constActiveGraph = + Teuchos::rcp_const_cast(ActiveMatrix->getGraph()); + z2_adapter_type Zoltan2Graph (constActiveGraph); + + typedef Zoltan2::OrderingProblem ordering_problem_type; +#ifdef HAVE_MPI + // Grab the MPI Communicator and build the ordering problem with that + MPI_Comm myRawComm; + + RCP > mpicomm = + rcp_dynamic_cast > (ActiveMatrix->getComm ()); + if (mpicomm == Teuchos::null) { + myRawComm = MPI_COMM_SELF; + } else { + myRawComm = * (mpicomm->getRawMpiComm ()); + } + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist, myRawComm); #else - ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist); + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist); #endif - MyOrderingProblem.solve (); + MyOrderingProblem.solve (); - { - typedef Zoltan2::LocalOrderingSolution - ordering_solution_type; + { + typedef Zoltan2::LocalOrderingSolution + ordering_solution_type; - ordering_solution_type sol (*MyOrderingProblem.getLocalOrderingSolution()); + ordering_solution_type sol (*MyOrderingProblem.getLocalOrderingSolution()); - // perm[i] gives the where OLD index i shows up in the NEW - // ordering. revperm[i] gives the where NEW index i shows - // up in the OLD ordering. Note that perm is actually the - // "inverse permutation," in Zoltan2 terms. - perm = sol.getPermutationRCPConst (true); - revperm = sol.getPermutationRCPConst (); + // perm[i] gives the where OLD index i shows up in the NEW + // ordering. revperm[i] gives the where NEW index i shows + // up in the OLD ordering. Note that perm is actually the + // "inverse permutation," in Zoltan2 terms. + perm = sol.getPermutationRCPConst (true); + revperm = sol.getPermutationRCPConst (); + } } - } - // All reorderings here... - ReorderedLocalizedMatrix_ = rcp (new reorder_filter_type (ActiveMatrix, perm, revperm)); + // All reorderings here... + ReorderedLocalizedMatrix_ = rcp (new reorder_filter_type (ActiveMatrix, perm, revperm)); - ActiveMatrix = ReorderedLocalizedMatrix_; + ActiveMatrix = ReorderedLocalizedMatrix_; #else - // This is a logic_error, not a runtime_error, because - // setParameters() should have excluded this case already. - TEUCHOS_TEST_FOR_EXCEPTION( - true, std::logic_error, "Ifpack2::AdditiveSchwarz::setup: " - "The Zoltan2 and Xpetra packages must be enabled in order " - "to support reordering."); + // This is a logic_error, not a runtime_error, because + // setParameters() should have excluded this case already. + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::logic_error, "Ifpack2::AdditiveSchwarz::setup: " + "The Zoltan2 and Xpetra packages must be enabled in order " + "to support reordering."); #endif + } + innerMatrix_ = ActiveMatrix; + //The matrix for the inner preconditioner is just the reordered/localized matrix using filters + innerPrecMatrix = innerMatrix_; } - innerMatrix_ = ActiveMatrix; - TEUCHOS_TEST_FOR_EXCEPTION( - innerMatrix_.is_null (), std::logic_error, "Ifpack2::AdditiveSchwarz::" + innerMatrix_.is_null () || innerPrecMatrix.is_null(), std::logic_error, "Ifpack2::AdditiveSchwarz::" "setup: Inner matrix is null right before constructing inner solver. " "Please report this bug to the Ifpack2 developers."); @@ -1460,7 +1584,7 @@ void AdditiveSchwarz::setup () innerPrec.is_null (), std::logic_error, "Ifpack2::AdditiveSchwarz::setup: Failed to create inner preconditioner " "with name \"" << innerName << "\"."); - innerPrec->setMatrix (innerMatrix_); + innerPrec->setMatrix (innerPrecMatrix); // Extract and apply the sublist of parameters to give to the // inner solver, if there is such a sublist of parameters. @@ -1476,7 +1600,7 @@ void AdditiveSchwarz::setup () // The new inner matrix is different from the inner // preconditioner's current matrix, so give the inner // preconditioner the new inner matrix. - Inverse_->setMatrix (innerMatrix_); + Inverse_->setMatrix (innerPrecMatrix); } TEUCHOS_TEST_FOR_EXCEPTION( Inverse_.is_null (), std::logic_error, "Ifpack2::AdditiveSchwarz::" @@ -1523,7 +1647,10 @@ setInnerPreconditioner (const Teuchos::RCPsetMatrix (innerMatrix_); + if(auto asf = Teuchos::rcp_dynamic_cast>(innerMatrix_)) + innerSolver->setMatrix (asf->getFilteredMatrix()); + else + innerSolver->setMatrix (innerMatrix_); // If the user previously specified a parameter for the inner // preconditioner's type, then clear out that parameter and its diff --git a/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp new file mode 100644 index 000000000000..8537c4d04866 --- /dev/null +++ b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp @@ -0,0 +1,412 @@ + +/*@HEADER +// *********************************************************************** +// +// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package +// Copyright (2009) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +//@HEADER +*/ + +#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DECL_HPP +#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DECL_HPP + +/*! +\class AdditiveSchwarzFilter +\brief Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges, may reorder rows/columns, and may remove singletons (rows with no connections to other rows). + +This is essentially a fusion of LocalFilter, AdditiveSchwarzFilter and SingletonFilter. These three filters are used together in AdditiveSchwarz. +*/ + +#include "Ifpack2_ConfigDefs.hpp" +#include "Ifpack2_Details_RowMatrix.hpp" +#include "Ifpack2_OverlappingRowMatrix.hpp" + +namespace Ifpack2 +{ +namespace Details +{ + template + class AdditiveSchwarzFilter : + public Ifpack2::Details::RowMatrix { +public: + typedef typename MatrixType::scalar_type scalar_type; + typedef typename Kokkos::ArithTraits::val_type impl_scalar_type; + typedef typename MatrixType::local_ordinal_type local_ordinal_type; + typedef typename MatrixType::global_ordinal_type global_ordinal_type; + typedef typename MatrixType::node_type node_type; + typedef typename MatrixType::global_inds_host_view_type global_inds_host_view_type; + typedef typename MatrixType::local_inds_host_view_type local_inds_host_view_type; + typedef typename MatrixType::values_host_view_type values_host_view_type; + + typedef typename MatrixType::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type; + typedef typename MatrixType::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type; + typedef typename MatrixType::nonconst_values_host_view_type nonconst_values_host_view_type; + + typedef typename Teuchos::ScalarTraits::magnitudeType magnitude_type; + typedef Tpetra::RowMatrix row_matrix_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::MultiVector mv_type; + typedef typename crs_matrix_type::device_type device_type; + typedef typename crs_matrix_type::execution_space execution_space; + typedef Kokkos::RangePolicy policy_type; + typedef Kokkos::MDRangePolicy> policy_2d_type; + typedef Ifpack2::OverlappingRowMatrix overlapping_matrix_type; + typedef typename crs_matrix_type::local_matrix_device_type local_matrix_type; + typedef typename local_matrix_type::size_type size_type; + typedef typename local_matrix_type::row_map_type::non_const_type row_map_type; + typedef typename local_matrix_type::index_type entries_type; + typedef typename local_matrix_type::values_type values_type; + typedef typename row_map_type::HostMirror host_row_map_type; + typedef typename entries_type::HostMirror host_entries_type; + typedef typename values_type::HostMirror host_values_type; + typedef typename local_matrix_type::HostMirror host_local_matrix_type; + + static_assert(std::is_same::value, "Ifpack2::AdditiveSchwarzFilter: The template parameter MatrixType must be a Tpetra::RowMatrix specialization. Please don't use Tpetra::CrsMatrix (a subclass of Tpetra::RowMatrix) here anymore. The constructor can take either a RowMatrix or a CrsMatrix just fine."); + + typedef Tpetra::Map map_type; + + typedef typename row_matrix_type::mag_type mag_type; + + //! \name Constructor & destructor methods + //@{ + + /// \brief Constructor. + /// + /// \param A [in] The matrix to which to apply the filter. + // Must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix that wraps a Tpetra::CrsMatrix. + /// \param perm [in] Forward permutation of A's rows and columns. + /// \param reverseperm [in] Reverse permutation of A's rows and columns. + /// \param filterSingletons [in] If true, remove rows that have no local neighbors. + /// + /// It must make sense to apply the given permutation to both the + /// rows and columns. This means that the row and column Maps must + /// have the same numbers of entries on all processes, and must have + /// the same order of GIDs on all processes. + /// + /// perm[i] gives the where OLD index i shows up in the NEW + /// ordering. revperm[i] gives the where NEW index i shows up in + /// the OLD ordering. Note that perm is actually the "inverse + /// permutation," in Zoltan2 terms. + AdditiveSchwarzFilter(const Teuchos::RCP& A, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons); + + //! Update the filtered matrix in response to A's values changing (A's structure isn't allowed to change, though) + void updateMatrixValues(); + + Teuchos::RCP getFilteredMatrix() const; + + //! Destructor. + virtual ~AdditiveSchwarzFilter (); + + //@} + //! \name Matrix query methods + //@{ + + //! The matrix's communicator. + virtual Teuchos::RCP > getComm() const; + + //! Returns the Map that describes the row distribution in this matrix. + virtual Teuchos::RCP getRowMap() const; + + //! Returns the Map that describes the column distribution in this matrix. + virtual Teuchos::RCP getColMap() const; + + //! Returns the Map that describes the domain distribution in this matrix. + virtual Teuchos::RCP getDomainMap() const; + + //! Returns the Map that describes the range distribution in this matrix. + virtual Teuchos::RCP getRangeMap() const; + + //! Returns the RowGraph associated with this matrix. + virtual Teuchos::RCP > getGraph() const; + + //! Returns the number of global rows in this matrix. + virtual global_size_t getGlobalNumRows() const; + + //! \brief Returns the number of global columns in this matrix. + virtual global_size_t getGlobalNumCols() const; + + //! Returns the number of rows owned on the calling node. + virtual size_t getLocalNumRows() const; + + //! Returns the number of columns needed to apply the forward operator on this node, i.e., the number of elements listed in the column map. + virtual size_t getLocalNumCols() const; + + //! Returns the index base for global indices for this matrix. + virtual global_ordinal_type getIndexBase() const; + + //! Returns the global number of entries in this matrix. + virtual global_size_t getGlobalNumEntries() const; + + //! Returns the local number of entries in this matrix. + virtual size_t getLocalNumEntries() const; + + /// \brief The current number of entries in this matrix, stored on + /// the calling process, in the row whose global index is \c globalRow. + /// + /// \return The number of entries, or + /// Teuchos::OrdinalTraits::invalid() if the specified row + /// is not owned by the calling process. + virtual size_t getNumEntriesInGlobalRow (global_ordinal_type globalRow) const; + + /// \brief The current number of entries in this matrix, stored on + /// the calling process, in the row whose local index is \c globalRow. + /// + /// \return The number of entries, or + /// Teuchos::OrdinalTraits::invalid() if the specified row + /// is not owned by the calling process. + virtual size_t getNumEntriesInLocalRow (local_ordinal_type localRow) const; + + //! \brief Returns the maximum number of entries across all rows/columns on all nodes. + virtual size_t getGlobalMaxNumRowEntries() const; + + //! \brief Returns the maximum number of entries across all rows/columns on this node. + virtual size_t getLocalMaxNumRowEntries() const; + + //! \brief Indicates whether this matrix has a well-defined column map. + virtual bool hasColMap() const; + + //! \brief If matrix indices are in the local range, this function returns true. Otherwise, this function returns false. */ + virtual bool isLocallyIndexed() const; + + //! \brief If matrix indices are in the global range, this function returns true. Otherwise, this function returns false. */ + virtual bool isGloballyIndexed() const; + + //! Returns \c true if fillComplete() has been called. + virtual bool isFillComplete() const; + + //! Returns \c true if RowViews are supported. + virtual bool supportsRowViews() const; + + //@} + + //! @name Extraction Methods + //@{ + + //! Extract a list of entries in a specified global row of this matrix. Put into pre-allocated storage. + /*! + \param GlobalRow - (In) Global row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Matrix values. + \param NumEntries - (Out) Number of indices. + + Note: A std::runtime_error exception is thrown if either \c Indices or \c Values is not large enough to hold the data associated + with row \c GlobalRow. If \c GlobalRow does not belong to this node, then \c Indices and \c Values are unchanged and \c NumIndices is + returned as Teuchos::OrdinalTraits::invalid(). + */ + virtual void + getGlobalRowCopy (global_ordinal_type GlobalRow, + nonconst_global_inds_host_view_type &Indices, + nonconst_values_host_view_type &Values, + size_t& NumEntries) const; + //! Extract a list of entries in a specified local row of the graph. Put into storage allocated by calling routine. + /*! + \param LocalRow - (In) Local row number for which indices are desired. + \param Indices - (Out) Local column indices corresponding to values. + \param Values - (Out) Matrix values. + \param NumIndices - (Out) Number of indices. + + Note: A std::runtime_error exception is thrown if either \c Indices or \c Values is not large enough to hold the data associated + with row \c LocalRow. If \c LocalRow is not valid for this node, then \c Indices and \c Values are unchanged and \c NumIndices is + returned as Teuchos::OrdinalTraits::invalid(). + */ + virtual void + getLocalRowCopy (local_ordinal_type LocalRow, + nonconst_local_inds_host_view_type &Indices, + nonconst_values_host_view_type &Values, + size_t& NumEntries) const; + + //! Extract a const, non-persisting view of global indices in a specified row of the matrix. + /*! + \param GlobalRow - (In) Global row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Row values + \post indices.size() == getNumEntriesInGlobalRow(GlobalRow) + \pre isLocallyIndexed() == false + Note: If \c GlobalRow does not belong to this node, then \c indices is set to null. + */ + virtual void + getGlobalRowView (global_ordinal_type GlobalRow, + global_inds_host_view_type &indices, + values_host_view_type &values) const; + //! Extract a const, non-persisting view of local indices in a specified row of the matrix. + /*! + \param LocalRow - (In) Local row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Row values + \pre isGloballyIndexed() == false + \post indices.size() == getNumEntriesInDropRow(LocalRow) + + Note: If \c LocalRow does not belong to this node, then \c indices is set to null. + */ + virtual void + getLocalRowView (local_ordinal_type LocalRow, + local_inds_host_view_type & indices, + values_host_view_type & values) const; + //! \brief Get a copy of the diagonal entries owned by this node, with local row indices. + /*! Returns a distributed Vector object partitioned according to this matrix's row map, containing the + the zero and non-zero diagonals owned by this node. */ + virtual void getLocalDiagCopy(Tpetra::Vector &diag) const; + + //@} + + //! \name Mathematical Methods + //@{ + + /** + * \brief Scales the RowMatrix on the left with the Vector x. + * + * This matrix will be scaled such that A(i,j) = x(i)*A(i,j) + * where i denoes the global row number of A and + * j denotes the global column number of A. + * + * \param x A vector to left scale this matrix. + */ + virtual void leftScale(const Tpetra::Vector& x); + + /** + * \brief Scales the RowMatrix on the right with the Vector x. + * + * This matrix will be scaled such that A(i,j) = x(j)*A(i,j) + * where i denoes the global row number of A and + * j denotes the global column number of A. + * + * \param x A vector to right scale this matrix. + */ + virtual void rightScale(const Tpetra::Vector& x); + + //! Returns the Frobenius norm of the matrix. + /** Computes and returns the Frobenius norm of the matrix, defined as: + \f$ \|A\|_F = \sqrt{\sum_{i,j} \|\a_{ij}\|^2} \f$ + */ + virtual mag_type getFrobeniusNorm() const; + + /// \brief \f$ Y := \beta Y + \alpha Op(A) X \f$, + /// where Op(A) is either A, \f$A^T\f$, or \f$A^H\f$. + /// + /// Apply the reordered version of the matrix (or its transpose or + /// conjugate transpose) to the given multivector X, producing Y. + /// - if beta == 0, apply() must overwrite \c Y, + /// so that any values in \c Y (including NaNs) are ignored. + /// - if alpha == 0, apply() may short-circuit the + /// matrix, so that any values in \c X (including NaNs) are + /// ignored. + /// + /// This method assumes that X and Y are in the reordered order. + /// + /// If hasTransposeApply() returns false, then the only valid value + /// of \c mode is Teuchos::NO_TRANS (the default). Otherwise, it + /// accepts the following values: + /// - mode = Teuchos::NO_TRANS: Op(A) is the reordered version of A. + /// - mode = Teuchos::TRANS: Op(A) is the reordered version of the + /// transpose of A. + /// - mode = Teuchos::CONJ_TRANS: Op(A) is reordered version of + /// the conjugate transpose of A. + virtual void + apply (const Tpetra::MultiVector &X, + Tpetra::MultiVector &Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + scalar_type alpha = Teuchos::ScalarTraits::one(), + scalar_type beta = Teuchos::ScalarTraits::zero()) const; + + //! Whether apply() can apply the transpose or conjugate transpose. + virtual bool hasTransposeApply() const; + + //! Solve singleton rows of OverlappingY, then filter and permute OverlappingB to get the reduced B. + void CreateReducedProblem( + const Tpetra::MultiVector& OverlappingB, + Tpetra::MultiVector& OverlappingY, + Tpetra::MultiVector& ReducedReorderedB) const; + + //! Scatter ReducedY back to non-singleton rows of OverlappingY, according to the reordering. + void UpdateLHS( + const Tpetra::MultiVector& ReducedReorderedY, + Tpetra::MultiVector& OverlappingY) const; + + void setup(const Teuchos::RCP& A_unfiltered, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons); + + /// \brief Fill the entries/values in the local matrix, and from that construct A_. + /// + /// Requires that the structure of A_unfiltered_ has not changed since this was constructed. + /// Used by both the constructor and updateMatrixValues(). + void fillLocalMatrix(local_matrix_type localMatrix); + + //@} + +private: + //! Pointer to the matrix to be preconditioned. + Teuchos::RCP A_unfiltered_; + //! Filtered and reordered matrix. + Teuchos::RCP A_; + //! Permutation: Original to reordered. Size is number of local rows in A_unfiltered_. Rows which were filtered from A_ map to -1 (Invalid). + Kokkos::DualView perm_; + //! Permutation: Reordered to original. Size is number of local rows in A_. + Kokkos::DualView reverseperm_; + local_ordinal_type numSingletons_; + //! List of singleton rows (indices are rows in A_unfiltered_) + Kokkos::DualView singletons_; + //! Diagonal values of singletons (used for solving those rows) + Kokkos::DualView singletonDiagonals_; + + //! Whether singletons (disconnected rows) are filtered out of the local matrix + // (apply and solve still process those rows separately) + bool FilterSingletons_; + + //! Row, col, domain and range map of this locally filtered matrix (it's square and non-distributed) + Teuchos::RCP localMap_; +}; + +}} //namespace Ifpack2::Details + +#endif + diff --git a/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp new file mode 100644 index 000000000000..0793527b4cc1 --- /dev/null +++ b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp @@ -0,0 +1,729 @@ +/*@HEADER +// *********************************************************************** +// +// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package +// Copyright (2009) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +//@HEADER +*/ + +#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP +#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP + +#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp" +#include "KokkosKernels_Sorting.hpp" +#include "Kokkos_Bitset.hpp" + +namespace Ifpack2 +{ +namespace Details +{ + +template +AdditiveSchwarzFilter:: +AdditiveSchwarzFilter(const Teuchos::RCP& A_unfiltered, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons) +{ + setup(A_unfiltered, perm, reverseperm, filterSingletons); +} + +template +void AdditiveSchwarzFilter:: +setup(const Teuchos::RCP& A_unfiltered, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons) +{ + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::rcp_dynamic_cast; + //Check that A is an instance of allowed types, either: + // - Tpetra::CrsMatrix + // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_) + TEUCHOS_TEST_FOR_EXCEPTION( + !rcp_dynamic_cast(A_unfiltered) && + !rcp_dynamic_cast(A_unfiltered), + std::invalid_argument, + "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix"); + A_unfiltered_ = A_unfiltered; + local_matrix_type A_local, A_halo; + bool haveHalo = !rcp_dynamic_cast(A_unfiltered_).is_null(); + if(haveHalo) + { + auto A_overlapping = rcp_dynamic_cast(A_unfiltered_); + A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice(); + A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice(); + } + else + { + auto A_crs = rcp_dynamic_cast(A_unfiltered_); + A_local = A_crs->getLocalMatrixDevice(); + //leave A_halo empty in this case + } + //Check that perm and reversePerm are the same length and match the number of local rows in A + TEUCHOS_TEST_FOR_EXCEPTION( + perm.size() != reverseperm.size(), + std::invalid_argument, + "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length"); + TEUCHOS_TEST_FOR_EXCEPTION( + (size_t) perm.size() != (size_t) A_unfiltered_->getLocalNumRows(), + std::invalid_argument, + "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A"); + //First, compute the permutation tables (that exclude singletons, if requested) + FilterSingletons_ = filterSingletons; + local_ordinal_type numLocalRows; + local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows(); + if(FilterSingletons_) + { + //Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size) + singletons_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows); + singletonDiagonals_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows); + auto singletonsDevice = singletons_.view_device(); + singletons_.modify_device(); + auto singletonDiagonalsDevice = singletonDiagonals_.view_device(); + singletonDiagonals_.modify_device(); + local_ordinal_type numSingletons; + Kokkos::Bitset isSingletonBitset(totalLocalRows); + Kokkos::parallel_scan(policy_type(0, totalLocalRows), + KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons, bool finalPass) + { + bool isSingleton = true; + impl_scalar_type singletonValue = Kokkos::ArithTraits::zero(); + if(i < A_local.numRows()) + { + //row i is in original local matrix + size_type rowBegin = A_local.graph.row_map(i); + size_type rowEnd = A_local.graph.row_map(i + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_local.graph.entries(j); + if(entry < totalLocalRows && entry != i) + { + isSingleton = false; + break; + } + if(finalPass && entry == i) + { + //note: using a sum to compute the diagonal value, in case entries are not compressed. + singletonValue += A_local.values(j); + } + } + } + else + { + //row i is in halo + local_ordinal_type row = i - A_local.numRows(); + size_type rowBegin = A_halo.graph.row_map(row); + size_type rowEnd = A_halo.graph.row_map(row + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_halo.graph.entries(j); + if(entry < totalLocalRows && entry != i) + { + isSingleton = false; + break; + } + if(finalPass && entry == i) + { + singletonValue += A_halo.values(j); + } + } + } + if(isSingleton) + { + if(finalPass) + { + isSingletonBitset.set(i); + singletonsDevice(lnumSingletons) = i; + singletonDiagonalsDevice(lnumSingletons) = singletonValue; + } + lnumSingletons++; + } + }, numSingletons); + numSingletons_ = numSingletons; + //Each local row in A_unfiltered is either a singleton or a row in the filtered matrix. + numLocalRows = totalLocalRows - numSingletons_; + //Using the list of singletons, create the reduced permutations. + perm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows); + perm_.modify_device(); + reverseperm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows); + reverseperm_.modify_device(); + auto permView = perm_.view_device(); + auto reversepermView = reverseperm_.view_device(); + //Get the original inverse permutation on device + Kokkos::View origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows); + Kokkos::View origpermHost(reverseperm.get(), totalLocalRows); + Kokkos::deep_copy(execution_space(), origpermView, origpermHost); + //reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation. + Kokkos::parallel_scan(policy_type(0, totalLocalRows), + KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex, bool finalPass) + { + local_ordinal_type origRow = origpermView(i); + if(!isSingletonBitset.test(origRow)) + { + if(finalPass) + { + //mark the mapping in both directions between origRow and lindex (a row in filtered A) + reversepermView(lindex) = origRow; + permView(origRow) = lindex; + } + lindex++; + } + else + { + //origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A. + if(finalPass) + permView(origRow) = local_ordinal_type(-1); + } + }); + perm_.sync_host(); + reverseperm_.sync_host(); + } + else + { + //We will keep all the local rows, so use the permutation as is + perm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size()); + perm_.modify_host(); + auto permHost = perm_.view_host(); + for(size_t i = 0; i < (size_t) perm.size(); i++) + permHost(i) = perm[i]; + perm_.sync_device(); + reverseperm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size()); + reverseperm_.modify_host(); + auto reversepermHost = reverseperm_.view_host(); + for(size_t i = 0; i < (size_t) reverseperm.size(); i++) + reversepermHost(i) = reverseperm[i]; + reverseperm_.sync_device(); + numSingletons_ = 0; + numLocalRows = totalLocalRows; + } + auto permView = perm_.view_device(); + auto reversepermView = reverseperm_.view_device(); + //Fill the local matrix of A_ (filtered and permuted) + //First, construct the rowmap by counting the entries in each row + row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1); + Kokkos::parallel_for(policy_type(0, numLocalRows), + KOKKOS_LAMBDA(local_ordinal_type i) + { + //Count the entries that will be in filtered row i. + //This means entries which correspond to local, non-singleton rows/columns. + local_ordinal_type numEntries = 0; + local_ordinal_type origRow = reversepermView(i); + if(origRow < A_local.numRows()) + { + //row i is in original local matrix + size_type rowBegin = A_local.graph.row_map(origRow); + size_type rowEnd = A_local.graph.row_map(origRow + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_local.graph.entries(j); + if(entry < totalLocalRows && permView(entry) != -1) + numEntries++; + } + } + else + { + //row i is in halo + local_ordinal_type row = origRow - A_local.numRows(); + size_type rowBegin = A_halo.graph.row_map(row); + size_type rowEnd = A_halo.graph.row_map(row + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_halo.graph.entries(j); + if(entry < totalLocalRows && permView(entry) != -1) + numEntries++; + } + } + localRowptrs(i) = numEntries; + if(i == numLocalRows - 1) + localRowptrs(i + 1) = 0; + }); + //Prefix sum to finish computing the rowptrs + size_type numLocalEntries; + Kokkos::parallel_scan(policy_type(0, numLocalRows + 1), + KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries, bool finalPass) + { + size_type numEnt = localRowptrs(i); + if(finalPass) + localRowptrs(i) = lnumEntries; + lnumEntries += numEnt; + }, numLocalEntries); + //Allocate and fill local entries and values + entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries); + values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries); + //Create the local matrix with the uninitialized entries/values, then fill it. + local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries); + fillLocalMatrix(localMatrix); + //Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map) +#ifdef HAVE_IFPACK2_DEBUG + TEUCHOS_TEST_FOR_EXCEPTION( + ! mapPairsAreFitted (A_unfiltered_), std::invalid_argument, "Ifpack2::LocalFilter: " + "A's Map pairs are not fitted to each other on Process " + << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's " + "communicator. " + "This means that LocalFilter does not currently know how to work with A. " + "This will change soon. Please see discussion of Bug 5992."); +#endif // HAVE_IFPACK2_DEBUG + + // Build the local communicator (containing this process only). + RCP > localComm; +#ifdef HAVE_MPI + localComm = rcp (new Teuchos::MpiComm (MPI_COMM_SELF)); +#else + localComm = rcp (new Teuchos::SerialComm ()); +#endif // HAVE_MPI + //All 4 maps are the same for a local square operator. + localMap_ = rcp (new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed)); + //Create the inner filtered matrix. + auto crsParams = rcp(new Teuchos::ParameterList); + crsParams->set("sorted", true); + //NOTE: this is the fastest way to construct A_ - it's created as fillComplete, + //and no communication needs to be done since localMap_ uses a local comm. + //It does need to copy the whole local matrix to host when DualViews are constructed + //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix. + //It also needs to compute local constants (maxNumRowEntries) but this should be a + //cheap operation relative to what this constructor already did. + A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams)); +} + +template +AdditiveSchwarzFilter::~AdditiveSchwarzFilter () {} + +template +void AdditiveSchwarzFilter::updateMatrixValues() +{ + //Get the local matrix back from A_ + auto localMatrix = A_->getLocalMatrixDevice(); + //Copy new values from A_unfiltered to the local matrix, and then reconstruct A_. + fillLocalMatrix(localMatrix); + A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values); +} + +template +Teuchos::RCP::crs_matrix_type> +AdditiveSchwarzFilter::getFilteredMatrix() const +{ + return A_; +} + +template +void AdditiveSchwarzFilter::fillLocalMatrix(typename AdditiveSchwarzFilter::local_matrix_type localMatrix) +{ + auto localRowptrs = localMatrix.graph.row_map; + auto localEntries = localMatrix.graph.entries; + auto localValues = localMatrix.values; + auto permView = perm_.view_device(); + auto reversepermView = reverseperm_.view_device(); + local_matrix_type A_local, A_halo; + bool haveHalo = !Teuchos::rcp_dynamic_cast(A_unfiltered_).is_null(); + if(haveHalo) + { + auto A_overlapping = Teuchos::rcp_dynamic_cast(A_unfiltered_); + A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice(); + A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice(); + } + else + { + auto A_crs = Teuchos::rcp_dynamic_cast(A_unfiltered_); + A_local = A_crs->getLocalMatrixDevice(); + //leave A_halo empty in this case + } + local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows(); + //Fill entries and values + Kokkos::parallel_for(policy_type(0, localMatrix.numRows()), + KOKKOS_LAMBDA(local_ordinal_type i) + { + //Count the entries that will be in filtered row i. + //This means entries which correspond to local, non-singleton rows/columns. + size_type outRowStart = localRowptrs(i); + local_ordinal_type insertPos = 0; + local_ordinal_type origRow = reversepermView(i); + if(origRow < A_local.numRows()) + { + //row i is in original local matrix + size_type rowBegin = A_local.graph.row_map(origRow); + size_type rowEnd = A_local.graph.row_map(origRow + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_local.graph.entries(j); + if(entry < totalLocalRows) + { + auto newCol = permView(entry); + if(newCol != -1) + { + localEntries(outRowStart + insertPos) = newCol; + localValues(outRowStart + insertPos) = A_local.values(j); + insertPos++; + } + } + } + } + else + { + //row i is in halo + local_ordinal_type row = origRow - A_local.numRows(); + size_type rowBegin = A_halo.graph.row_map(row); + size_type rowEnd = A_halo.graph.row_map(row + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_halo.graph.entries(j); + if(entry < totalLocalRows) + { + auto newCol = permView(entry); + if(newCol != -1) + { + localEntries(outRowStart + insertPos) = newCol; + localValues(outRowStart + insertPos) = A_halo.values(j); + insertPos++; + } + } + } + } + }); + //Sort the matrix, since the reordering can shuffle the entries into any order. + KokkosKernels::sort_crs_matrix(localMatrix); +} + +template +Teuchos::RCP > AdditiveSchwarzFilter::getComm() const +{ + return localMap_->getComm(); +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getRowMap() const +{ + return localMap_; +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getColMap() const +{ + return localMap_; +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getDomainMap() const +{ + return localMap_; +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getRangeMap() const +{ + return localMap_; +} + +template +Teuchos::RCP > +AdditiveSchwarzFilter::getGraph() const +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGraph."); +} + +template +global_size_t AdditiveSchwarzFilter::getGlobalNumRows() const +{ + return A_->getGlobalNumRows(); +} + +template +global_size_t AdditiveSchwarzFilter::getGlobalNumCols() const +{ + return A_->getGlobalNumCols(); +} + +template +size_t AdditiveSchwarzFilter::getLocalNumRows() const +{ + return A_->getLocalNumRows(); +} + +template +size_t AdditiveSchwarzFilter::getLocalNumCols() const +{ + return A_->getLocalNumCols(); +} + +template +typename MatrixType::global_ordinal_type AdditiveSchwarzFilter::getIndexBase() const +{ + return A_->getIndexBase(); +} + +template +global_size_t AdditiveSchwarzFilter::getGlobalNumEntries() const +{ + return getLocalNumEntries(); +} + +template +size_t AdditiveSchwarzFilter::getLocalNumEntries() const +{ + return A_->getLocalNumEntries(); +} + +template +size_t AdditiveSchwarzFilter:: +getNumEntriesInGlobalRow (global_ordinal_type globalRow) const +{ + return A_->getNumEntriesInGlobalRow(globalRow); +} + +template +size_t AdditiveSchwarzFilter:: +getNumEntriesInLocalRow (local_ordinal_type localRow) const +{ + return A_->getNumEntriesInLocalRow(localRow); +} + +template +size_t AdditiveSchwarzFilter::getGlobalMaxNumRowEntries() const +{ + //Use A_unfiltered_ to get a valid upper bound for this. + //This lets us support this function without computing global constants on A_. + return A_unfiltered_->getGlobalMaxNumRowEntries(); +} + +template +size_t AdditiveSchwarzFilter::getLocalMaxNumRowEntries() const +{ + //Use A_unfiltered_ to get a valid upper bound for this + return A_unfiltered_->getLocalMaxNumRowEntries(); +} + + +template +bool AdditiveSchwarzFilter::hasColMap() const +{ + return true; +} + + +template +bool AdditiveSchwarzFilter::isLocallyIndexed() const +{ + return true; +} + + +template +bool AdditiveSchwarzFilter::isGloballyIndexed() const +{ + return false; +} + + +template +bool AdditiveSchwarzFilter::isFillComplete() const +{ + return true; +} + + +template +void AdditiveSchwarzFilter:: + getGlobalRowCopy (global_ordinal_type globalRow, + nonconst_global_inds_host_view_type &globalInd, + nonconst_values_host_view_type &val, + size_t& numEntries) const +{ + throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy."); +} + +template +void AdditiveSchwarzFilter:: +getLocalRowCopy (local_ordinal_type LocalRow, + nonconst_local_inds_host_view_type &Indices, + nonconst_values_host_view_type &Values, + size_t& NumEntries) const + +{ + A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries); +} + +template +void AdditiveSchwarzFilter::getGlobalRowView(global_ordinal_type /* GlobalRow */, + global_inds_host_view_type &/*indices*/, + values_host_view_type &/*values*/) const +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView."); +} + +template +void AdditiveSchwarzFilter::getLocalRowView(local_ordinal_type LocalRow, + local_inds_host_view_type & indices, + values_host_view_type & values) const +{ + A_->getLocalRowView(LocalRow, indices, values); +} + +template +void AdditiveSchwarzFilter:: +getLocalDiagCopy (Tpetra::Vector &diag) const +{ + // This is somewhat dubious as to how the maps match. + A_->getLocalDiagCopy(diag); +} + +template +void AdditiveSchwarzFilter::leftScale(const Tpetra::Vector& /* x */) +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale."); +} + +template +void AdditiveSchwarzFilter::rightScale(const Tpetra::Vector& /* x */) +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale."); +} + +template +void AdditiveSchwarzFilter:: +apply (const Tpetra::MultiVector &X, + Tpetra::MultiVector &Y, + Teuchos::ETransp mode, + scalar_type alpha, + scalar_type beta) const +{ + A_->apply(X, Y, mode, alpha, beta); +} + +template +void AdditiveSchwarzFilter:: +CreateReducedProblem( + const Tpetra::MultiVector& OverlappingB, + Tpetra::MultiVector& OverlappingY, + Tpetra::MultiVector& ReducedReorderedB) const +{ + //NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews, + //so they are all constant stride (this avoids a lot of boilerplate with whichVectors) + TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(), + std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride."); + local_ordinal_type numVecs = OverlappingB.getNumVectors(); + auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll); + auto singletons = singletons_.view_device(); + auto singletonDiags = singletonDiagonals_.view_device(); + auto revperm = reverseperm_.view_device(); + //First, solve the singletons. + { + auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite); + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}), + KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col) + { + local_ordinal_type row = singletons(singletonIndex); + y(row, col) = b(row, col) / singletonDiags(singletonIndex); + }); + } + //Next, permute OverlappingB to ReducedReorderedB. + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}), + KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) + { + reducedB(row, col) = b(revperm(row), col); + }); + //Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction) + //This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity. + //Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB. + if(numSingletons_) + { + mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false); + A_unfiltered_->apply(OverlappingY, singletonUpdates); + auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto revperm = reverseperm_.view_device(); + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}), + KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) + { + local_ordinal_type origRow = revperm(row); + reducedB(row, col) -= updatesView(origRow, col); + }); + } +} + +template +void AdditiveSchwarzFilter::UpdateLHS( + const Tpetra::MultiVector& ReducedReorderedY, + Tpetra::MultiVector& OverlappingY) const +{ + //NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews, + //so they are all constant stride (this avoids a lot of boilerplate with whichVectors) + TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(), + std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride."); + auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite); + auto revperm = reverseperm_.view_device(); + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}), + KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) + { + y(revperm(row), col) = reducedY(row, col); + }); +} + +template +bool AdditiveSchwarzFilter::hasTransposeApply() const +{ + return true; +} + + +template +bool AdditiveSchwarzFilter::supportsRowViews() const +{ + return true; +} + +template +typename AdditiveSchwarzFilter::mag_type AdditiveSchwarzFilter::getFrobeniusNorm() const +{ + // Reordering doesn't change the Frobenius norm. + return A_->getFrobeniusNorm (); +} + +}} // namespace Ifpack2::Details + +#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \ + template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix >; + +#endif + diff --git a/packages/ifpack2/src/Ifpack2_IlukGraph.hpp b/packages/ifpack2/src/Ifpack2_IlukGraph.hpp index cff61d43f41f..9f715f4f8a6b 100644 --- a/packages/ifpack2/src/Ifpack2_IlukGraph.hpp +++ b/packages/ifpack2/src/Ifpack2_IlukGraph.hpp @@ -281,6 +281,7 @@ void IlukGraph::initialize() // Get Maximum Row length const int MaxNumIndices = OverlapGraph_->getLocalMaxNumRowEntries (); + std::cout << "IlukGraph: OverlapGraph says max num row entries is " << MaxNumIndices << '\n'; // FIXME (mfh 23 Dec 2013) Use size_t or whatever // getLocalNumElements() returns, instead of ptrdiff_t. diff --git a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp index 00433568cf6b..167a464a1623 100644 --- a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp @@ -75,6 +75,8 @@ class OverlappingRowMatrix : using row_matrix_type = Tpetra::RowMatrix; + using crs_matrix_type = Tpetra::CrsMatrix; static_assert(std::is_same::value, "Ifpack2::OverlappingRowMatrix: The template parameter MatrixType must be a Tpetra::RowMatrix specialization. Please don't use Tpetra::CrsMatrix (a subclass of Tpetra::RowMatrix) here anymore. The constructor can take either a RowMatrix or a CrsMatrix just fine."); @@ -341,18 +343,19 @@ class OverlappingRowMatrix : void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const; - virtual Teuchos::RCP getUnderlyingMatrix() const; + Teuchos::RCP getUnderlyingMatrix() const; - Teuchos::RCP getExtMatrix() const; + Teuchos::RCP getExtMatrix() const; Teuchos::ArrayView getExtHaloStarts() const; + void doExtImport(); + private: typedef Tpetra::Map map_type; typedef Tpetra::Import import_type; typedef Tpetra::Export export_type; typedef Tpetra::RowGraph row_graph_type; - typedef Tpetra::CrsMatrix crs_matrix_type; typedef Tpetra::Vector vector_type; //! The input matrix to the constructor. @@ -369,7 +372,7 @@ class OverlappingRowMatrix : Teuchos::RCP Importer_; //! The matrix containing only the overlap rows. - Teuchos::RCP ExtMatrix_; + Teuchos::RCP ExtMatrix_; Teuchos::RCP ExtMap_; Teuchos::RCP ExtImporter_; Teuchos::Array ExtHaloStarts_; diff --git a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp index f24c7ce5d4e3..86a1175e3e36 100644 --- a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp +++ b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp @@ -175,11 +175,9 @@ OverlappingRowMatrix (const Teuchos::RCP& A, ExtImporter_ = rcp (new import_type (A_->getRowMap (), ExtMap_)); { - RCP ExtMatrix_nc = - rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); - ExtMatrix_nc->doImport (*A_, *ExtImporter_, Tpetra::INSERT); - ExtMatrix_nc->fillComplete (A_->getDomainMap (), RowMap_); - ExtMatrix_ = ExtMatrix_nc; // we only need the const version after here + ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); + ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT); + ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_); } // fix indices for overlapping matrix @@ -819,14 +817,14 @@ void OverlappingRowMatrix::describe(Teuchos::FancyOStream &out, } template -Teuchos::RCP > +Teuchos::RCP > OverlappingRowMatrix::getUnderlyingMatrix() const { return A_; } template -Teuchos::RCP > +Teuchos::RCP > OverlappingRowMatrix::getExtMatrix() const { return ExtMatrix_; @@ -838,6 +836,13 @@ Teuchos::ArrayView OverlappingRowMatrix::getExtHaloSta return ExtHaloStarts_(); } +template +void OverlappingRowMatrix::doExtImport() +{ + ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); + ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT); + ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_); +} } // namespace Ifpack2 diff --git a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp index 2d50702d3203..b80f6600229c 100644 --- a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp +++ b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp @@ -133,9 +133,11 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(Ifpack2AdditiveSchwarz, Test0, Scalar, LocalOr params.set ("schwarz: combine mode", "Zero"); #if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) + std::cout << "Test0: Enabling reordering!\n"; params.set ("schwarz: use reordering", true); params.set ("schwarz: reordering list", zlist); #else + std::cout << "Test0: NOT enabling reordering!\n"; params.set ("schwarz: use reordering", false); #endif diff --git a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp index f15c6d2f0d09..7a8c12d4087d 100644 --- a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp +++ b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp @@ -84,15 +84,8 @@ localSolve (Tpetra::MultiVector< using Teuchos::CONJ_TRANS; using Teuchos::NO_TRANS; using Teuchos::TRANS; - using MV = Tpetra::MultiVector< - typename CrsMatrixType::scalar_type, - typename CrsMatrixType::local_ordinal_type, - typename CrsMatrixType::global_ordinal_type, - typename CrsMatrixType::node_type>; using scalar_type = typename CrsMatrixType::scalar_type; using STS = Teuchos::ScalarTraits; - using device_type = typename CrsMatrixType::device_type; - using dev_memory_space = typename device_type::memory_space; const char prefix[] = "localSolve: "; TEUCHOS_TEST_FOR_EXCEPTION @@ -999,12 +992,6 @@ void testArrowMatrix (bool& success, Teuchos::FancyOStream& out) L,U,out); if(!gblSuccess) return; - typedef typename crs_matrix_type::local_graph_device_type local_graph_type; - typedef typename crs_matrix_type::local_matrix_device_type local_matrix_type; - typedef typename local_matrix_type::row_map_type::non_const_type row_offsets_type; - typedef typename local_graph_type::entries_type::non_const_type col_inds_type; - typedef typename local_matrix_type::values_type::non_const_type values_type; - typedef typename crs_matrix_type::local_inds_host_view_type const_local_inds_type; typedef typename crs_matrix_type::values_host_view_type const_values_type; diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 79da3ba2edae..04cec10c9bce 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1326,7 +1326,7 @@ namespace Tpetra { /// /// \pre columnIndices are sorted within rows /// \pre hasColMap() == true - /// \pre rowPointers.size() != getLocalNumRows()+1 + /// \pre rowPointers.size() == getLocalNumRows()+1 /// \pre No insert routines have been called. /// /// \warning This method is intended for expert developer use @@ -1339,7 +1339,7 @@ namespace Tpetra { /// /// \pre columnIndices are sorted within rows /// \pre hasColMap() == true - /// \pre rowPointers.size() != getLocalNumRows()+1 + /// \pre rowPointers.size() == getLocalNumRows()+1 /// \pre No insert routines have been called. /// /// \warning This method is intended for expert developer use diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index e108e6134fcf..b7a7792e7a68 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -2720,6 +2720,7 @@ namespace Tpetra { } } + /* // FIXME (mfh 07 Aug 2014) We need to relax this restriction, // since the future model will be allocation at construction, not // lazy allocation on first insert. @@ -2727,6 +2728,7 @@ namespace Tpetra { ((this->lclIndsUnpacked_wdv.extent (0) != 0 || this->gblInds_wdv.extent (0) != 0), std::runtime_error, "You may not call this method if 1-D data " "structures are already allocated."); + */ indicesAreAllocated_ = true; indicesAreLocal_ = true;