diff --git a/packages/ifpack2/src/CMakeLists.txt b/packages/ifpack2/src/CMakeLists.txt index bbb81af997d3..d16b67b28e6c 100644 --- a/packages/ifpack2/src/CMakeLists.txt +++ b/packages/ifpack2/src/CMakeLists.txt @@ -248,6 +248,7 @@ IF(Ifpack2_ENABLE_EXPLICIT_INSTANTIATION) SparsityFilter ContainerFactory TriDiContainer + Details::AdditiveSchwarzFilter Details::Chebyshev Details::ChebyshevKernel Details::DenseSolver diff --git a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp index 9d75ee4fa59c..8c0a135e5451 100644 --- a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp @@ -56,6 +56,7 @@ #include "Ifpack2_Preconditioner.hpp" #include "Ifpack2_Details_CanChangeMatrix.hpp" #include "Ifpack2_Details_NestedPreconditioner.hpp" +#include "Ifpack2_OverlappingRowMatrix.hpp" #include "Tpetra_Map.hpp" #include "Tpetra_MultiVector.hpp" #include "Tpetra_RowMatrix.hpp" @@ -329,6 +330,12 @@ class AdditiveSchwarz : using row_matrix_type = Tpetra::RowMatrix; + + //! The Tpetra::CrsMatrix specialization that is a subclass of MatrixType. + using crs_matrix_type = + Tpetra::CrsMatrix; + //@} // \name Constructors and destructor //@{ @@ -774,7 +781,7 @@ class AdditiveSchwarz : /// \brief The overlapping matrix. /// /// If nonnull, this is an instance of OverlappingRowMatrix. - Teuchos::RCP OverlappingMatrix_; + Teuchos::RCP> OverlappingMatrix_; //! The reordered matrix. Teuchos::RCP ReorderedLocalizedMatrix_; diff --git a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp index 58beacd301a7..91471e0f96d1 100644 --- a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp +++ b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp @@ -69,11 +69,11 @@ #endif #include "Ifpack2_Details_CanChangeMatrix.hpp" -#include "Ifpack2_LocalFilter.hpp" -#include "Ifpack2_OverlappingRowMatrix.hpp" #include "Ifpack2_Parameters.hpp" +#include "Ifpack2_LocalFilter.hpp" #include "Ifpack2_ReorderFilter.hpp" #include "Ifpack2_SingletonFilter.hpp" +#include "Ifpack2_Details_AdditiveSchwarzFilter.hpp" #ifdef HAVE_MPI #include "Teuchos_DefaultMpiComm.hpp" @@ -463,24 +463,14 @@ apply (const Tpetra::MultiVector overlap_mat_type; - RCP overlapMatrix; + // do communication if necessary if (IsOverlapping_) { - overlapMatrix = rcp_dynamic_cast (OverlappingMatrix_); TEUCHOS_TEST_FOR_EXCEPTION - (overlapMatrix.is_null (), std::logic_error, prefix << + (OverlappingMatrix_.is_null (), std::logic_error, prefix << "IsOverlapping_ is true, but OverlappingMatrix_, while nonnull, is " "not an OverlappingRowMatrix. Please report this " "bug to the Ifpack2 developers."); - } - - // do communication if necessary - if (IsOverlapping_) { - TEUCHOS_TEST_FOR_EXCEPTION - (overlapMatrix.is_null (), std::logic_error, prefix - << "overlapMatrix is null when it shouldn't be. " - "Please report this bug to the Ifpack2 developers."); - overlapMatrix->importMultiVector (*R, *OverlappingB, Tpetra::INSERT); + OverlappingMatrix_->importMultiVector (*R, *OverlappingB, Tpetra::INSERT); //JJH We don't need to import the solution Y we are always solving AY=R with initial guess zero //if (ZeroStartingSolution_ == false) @@ -556,10 +546,10 @@ apply (const Tpetra::MultiVectorexportMultiVector (*OverlappingY, *C, CombineMode_); + OverlappingMatrix_->exportMultiVector (*OverlappingY, *C, CombineMode_); } else { // mfh 16 Apr 2014: Make a view of Y with the same Map as @@ -630,63 +620,83 @@ localApply (MV& OverlappingB, MV& OverlappingY) const using Teuchos::rcp_dynamic_cast; const size_t numVectors = OverlappingB.getNumVectors (); - if (FilterSingletons_) { - // process singleton filter - MV ReducedB (SingletonMatrix_->getRowMap (), numVectors); - MV ReducedY (SingletonMatrix_->getRowMap (), numVectors); - RCP > singletonFilter = - rcp_dynamic_cast > (SingletonMatrix_); - TEUCHOS_TEST_FOR_EXCEPTION - (! SingletonMatrix_.is_null () && singletonFilter.is_null (), - std::logic_error, "Ifpack2::AdditiveSchwarz::localApply: " - "SingletonFilter_ is nonnull but is not a SingletonFilter" - ". This should never happen. Please report this bug " - "to the Ifpack2 developers."); - singletonFilter->SolveSingletons (OverlappingB, OverlappingY); - singletonFilter->CreateReducedRHS (OverlappingY, OverlappingB, ReducedB); - - // process reordering - if (! UseReordering_) { - Inverse_->solve (ReducedY, ReducedB); - } - else { - RCP > rf = - rcp_dynamic_cast > (ReorderedLocalizedMatrix_); + auto additiveSchwarzFilter = rcp_dynamic_cast>(innerMatrix_); + if(additiveSchwarzFilter) + { + //Create the reduced system innerMatrix_ * ReducedY = ReducedB. + //This effectively fuses 3 tasks: + // -SingletonFilter::SolveSingletons (solve entries of OverlappingY corresponding to singletons) + // -SingletonFilter::CreateReducedRHS (fill ReducedReorderedB from OverlappingB, with entries in singleton columns eliminated) + // -ReorderFilter::permuteOriginalToReordered (apply permutation to ReducedReorderedB) + MV ReducedReorderedB (additiveSchwarzFilter->getRowMap(), numVectors); + MV ReducedReorderedY (additiveSchwarzFilter->getRowMap(), numVectors); + additiveSchwarzFilter->CreateReducedProblem(OverlappingB, OverlappingY, ReducedReorderedB); + //Apply inner solver + Inverse_->solve (ReducedReorderedY, ReducedReorderedB); + //Scatter ReducedY back to non-singleton rows of OverlappingY, according to the reordering. + additiveSchwarzFilter->UpdateLHS(ReducedReorderedY, OverlappingY); + } + else + { + if (FilterSingletons_) { + // process singleton filter + MV ReducedB (SingletonMatrix_->getRowMap (), numVectors); + MV ReducedY (SingletonMatrix_->getRowMap (), numVectors); + + RCP > singletonFilter = + rcp_dynamic_cast > (SingletonMatrix_); TEUCHOS_TEST_FOR_EXCEPTION - (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, - "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " - "nonnull but is not a ReorderFilter. This should " - "never happen. Please report this bug to the Ifpack2 developers."); - MV ReorderedB (ReducedB, Teuchos::Copy); - MV ReorderedY (ReducedY, Teuchos::Copy); - rf->permuteOriginalToReordered (ReducedB, ReorderedB); - Inverse_->solve (ReorderedY, ReorderedB); - rf->permuteReorderedToOriginal (ReorderedY, ReducedY); - } + (! SingletonMatrix_.is_null () && singletonFilter.is_null (), + std::logic_error, "Ifpack2::AdditiveSchwarz::localApply: " + "SingletonFilter_ is nonnull but is not a SingletonFilter" + ". This should never happen. Please report this bug " + "to the Ifpack2 developers."); + singletonFilter->SolveSingletons (OverlappingB, OverlappingY); + singletonFilter->CreateReducedRHS (OverlappingY, OverlappingB, ReducedB); + + // process reordering + if (! UseReordering_) { + Inverse_->solve (ReducedY, ReducedB); + } + else { + RCP > rf = + rcp_dynamic_cast > (ReorderedLocalizedMatrix_); + TEUCHOS_TEST_FOR_EXCEPTION + (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, + "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " + "nonnull but is not a ReorderFilter. This should " + "never happen. Please report this bug to the Ifpack2 developers."); + MV ReorderedB (ReducedB, Teuchos::Copy); + MV ReorderedY (ReducedY, Teuchos::Copy); + rf->permuteOriginalToReordered (ReducedB, ReorderedB); + Inverse_->solve (ReorderedY, ReorderedB); + rf->permuteReorderedToOriginal (ReorderedY, ReducedY); + } - // finish up with singletons - singletonFilter->UpdateLHS (ReducedY, OverlappingY); - } - else { - // process reordering - if (! UseReordering_) { - Inverse_->solve (OverlappingY, OverlappingB); + // finish up with singletons + singletonFilter->UpdateLHS (ReducedY, OverlappingY); } else { - MV ReorderedB (OverlappingB, Teuchos::Copy); - MV ReorderedY (OverlappingY, Teuchos::Copy); + // process reordering + if (! UseReordering_) { + Inverse_->solve (OverlappingY, OverlappingB); + } + else { + MV ReorderedB (OverlappingB, Teuchos::Copy); + MV ReorderedY (OverlappingY, Teuchos::Copy); - RCP > rf = - rcp_dynamic_cast > (ReorderedLocalizedMatrix_); - TEUCHOS_TEST_FOR_EXCEPTION - (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, - "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " - "nonnull but is not a ReorderFilter. This should " - "never happen. Please report this bug to the Ifpack2 developers."); - rf->permuteOriginalToReordered (OverlappingB, ReorderedB); - Inverse_->solve (ReorderedY, ReorderedB); - rf->permuteReorderedToOriginal (ReorderedY, OverlappingY); + RCP > rf = + rcp_dynamic_cast > (ReorderedLocalizedMatrix_); + TEUCHOS_TEST_FOR_EXCEPTION + (! ReorderedLocalizedMatrix_.is_null () && rf.is_null (), std::logic_error, + "Ifpack2::AdditiveSchwarz::localApply: ReorderedLocalizedMatrix_ is " + "nonnull but is not a ReorderFilter. This should " + "never happen. Please report this bug to the Ifpack2 developers."); + rf->permuteOriginalToReordered (OverlappingB, ReorderedB); + Inverse_->solve (ReorderedY, ReorderedB); + rf->permuteReorderedToOriginal (ReorderedY, OverlappingY); + } } } } @@ -1015,7 +1025,6 @@ void AdditiveSchwarz::initialize () InitializeTime_ += (timer->wallTime() - startTime); } - template bool AdditiveSchwarz::isInitialized () const { @@ -1056,6 +1065,23 @@ void AdditiveSchwarz::compute () } double startTime = timer->wallTime(); + // compute () assumes that the values of Matrix_ (aka A) have changed. + // If this has overlap, do an import from the input matrix to the halo. + if (IsOverlapping_) { + OverlappingMatrix_->doExtImport(); + } + // At this point, either Matrix_ or OverlappingMatrix_ (depending on whether this is overlapping) + // has new values and unchanged structure. If we are using AdditiveSchwarzFilter, update the local matrix. + // + if(auto asf = Teuchos::rcp_dynamic_cast>(innerMatrix_)) + { + //NOTE: if this compute() call comes right after the initialize() with no intervening matrix changes, this call is redundant. + //initialize() already filled the local matrix. However, we have no way to tell if this is the case. + asf->updateMatrixValues(); + } + //Now, whether the Inverse_'s matrix is the AdditiveSchwarzFilter's local matrix or simply Matrix_/OverlappingMatrix_, + //it will be able to see the new values and update itself accordingly. + { // Start timing here. TimeMonitor timeMon (*timer); @@ -1317,114 +1343,212 @@ void AdditiveSchwarz::setup () "initialize: The matrix to precondition is null. You must either pass " "a nonnull matrix to the constructor, or call setMatrix() with a nonnull " "input, before you may call this method."); + + RCP innerPrecMatrix; - // Localized version of Matrix_ or OverlappingMatrix_. - RCP LocalizedMatrix; + //If the matrix is a CrsMatrix or OverlappingRowMatrix, use the high-performance + //AdditiveSchwarzFilter. Otherwise, use composition of Reordered/Singleton/LocalFilter. + auto matrixCrs = rcp_dynamic_cast(Matrix_); + if(!OverlappingMatrix_.is_null() || !matrixCrs.is_null()) + { + ArrayRCP perm; + ArrayRCP revperm; + if (UseReordering_) { +#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) + // Unlike Ifpack, Zoltan2 does all the dirty work here. + Teuchos::ParameterList zlist = List_.sublist ("schwarz: reordering list"); + ReorderingAlgorithm_ = zlist.get ("order_method", "rcm"); + + if(ReorderingAlgorithm_ == "user") { + // User-provided reordering + perm = zlist.get >("user ordering"); + revperm = zlist.get >("user reverse ordering"); + } + else { + // Zoltan2 reordering + typedef Tpetra::RowGraph + row_graph_type; + typedef Zoltan2::TpetraRowGraphAdapter z2_adapter_type; + auto constActiveGraph = Teuchos::rcp_const_cast( + IsOverlapping_ ? OverlappingMatrix_->getGraph() : Matrix_->getGraph()); + z2_adapter_type Zoltan2Graph (constActiveGraph); + + typedef Zoltan2::OrderingProblem ordering_problem_type; +#ifdef HAVE_MPI + // Grab the MPI Communicator and build the ordering problem with that + MPI_Comm myRawComm; + + RCP > mpicomm = + rcp_dynamic_cast > (Matrix_->getComm ()); + if (mpicomm == Teuchos::null) { + myRawComm = MPI_COMM_SELF; + } else { + myRawComm = * (mpicomm->getRawMpiComm ()); + } + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist, myRawComm); +#else + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist); +#endif + MyOrderingProblem.solve (); - // The "most current local matrix." At the end of this method, this - // will be handed off to the inner solver. - RCP ActiveMatrix; + { + typedef Zoltan2::LocalOrderingSolution + ordering_solution_type; - // Create localized matrix. - if (! OverlappingMatrix_.is_null ()) { - LocalizedMatrix = rcp (new LocalFilter (OverlappingMatrix_)); - } - else { - LocalizedMatrix = rcp (new LocalFilter (Matrix_)); - } + ordering_solution_type sol (*MyOrderingProblem.getLocalOrderingSolution()); - // Sanity check; I don't trust the logic above to have created LocalizedMatrix. - TEUCHOS_TEST_FOR_EXCEPTION( - LocalizedMatrix.is_null (), std::logic_error, - "Ifpack2::AdditiveSchwarz::setup: LocalizedMatrix is null, after the code " - "that claimed to have created it. This should never be the case. Please " - "report this bug to the Ifpack2 developers."); - - // Mark localized matrix as active - ActiveMatrix = LocalizedMatrix; - - // Singleton Filtering - if (FilterSingletons_) { - SingletonMatrix_ = rcp (new SingletonFilter (LocalizedMatrix)); - ActiveMatrix = SingletonMatrix_; + // perm[i] gives the where OLD index i shows up in the NEW + // ordering. revperm[i] gives the where NEW index i shows + // up in the OLD ordering. Note that perm is actually the + // "inverse permutation," in Zoltan2 terms. + perm = sol.getPermutationRCPConst (true); + revperm = sol.getPermutationRCPConst (); + } + } +#else + // This is a logic_error, not a runtime_error, because + // setParameters() should have excluded this case already. + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::logic_error, "Ifpack2::AdditiveSchwarz::setup: " + "The Zoltan2 and Xpetra packages must be enabled in order " + "to support reordering."); +#endif + } + else + { + local_ordinal_type numLocalRows = OverlappingMatrix_.is_null() ? matrixCrs->getLocalNumRows() : OverlappingMatrix_->getLocalNumRows(); + //Use an identity ordering. + //TODO: create a non-permuted code path in AdditiveSchwarzFilter, in the case that neither + //reordering nor singleton filtering are enabled. In this situation it's like LocalFilter. + perm = ArrayRCP(numLocalRows); + revperm = ArrayRCP(numLocalRows); + for(local_ordinal_type i = 0; i < numLocalRows; i++) + { + perm[i] = i; + revperm[i] = i; + } + } + //Now, construct the filter + RCP> asf; + if(OverlappingMatrix_.is_null()) + asf = rcp(new Details::AdditiveSchwarzFilter(matrixCrs, perm, revperm, FilterSingletons_)); + else + asf = rcp(new Details::AdditiveSchwarzFilter(OverlappingMatrix_, perm, revperm, FilterSingletons_)); + innerMatrix_ = asf; + //The matrix for the inner preconditioner is a CrsMatrix + innerPrecMatrix = asf->getFilteredMatrix(); } + else + { + // Localized version of Matrix_ or OverlappingMatrix_. + RCP LocalizedMatrix; - // Do reordering - if (UseReordering_) { -#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) - // Unlike Ifpack, Zoltan2 does all the dirty work here. - typedef ReorderFilter reorder_filter_type; - Teuchos::ParameterList zlist = List_.sublist ("schwarz: reordering list"); - ReorderingAlgorithm_ = zlist.get ("order_method", "rcm"); - - ArrayRCP perm; - ArrayRCP revperm; + // The "most current local matrix." At the end of this method, this + // will be handed off to the inner solver. + RCP ActiveMatrix; - if(ReorderingAlgorithm_ == "user") { - // User-provided reordering - perm = zlist.get >("user ordering"); - revperm = zlist.get >("user reverse ordering"); + // Create localized matrix. + if (! OverlappingMatrix_.is_null ()) { + LocalizedMatrix = rcp (new LocalFilter (OverlappingMatrix_)); } else { - // Zoltan2 reordering - typedef Tpetra::RowGraph - row_graph_type; - typedef Zoltan2::TpetraRowGraphAdapter z2_adapter_type; - RCP constActiveGraph = - Teuchos::rcp_const_cast(ActiveMatrix->getGraph()); - z2_adapter_type Zoltan2Graph (constActiveGraph); - - typedef Zoltan2::OrderingProblem ordering_problem_type; -#ifdef HAVE_MPI - // Grab the MPI Communicator and build the ordering problem with that - MPI_Comm myRawComm; + LocalizedMatrix = rcp (new LocalFilter (Matrix_)); + } - RCP > mpicomm = - rcp_dynamic_cast > (ActiveMatrix->getComm ()); - if (mpicomm == Teuchos::null) { - myRawComm = MPI_COMM_SELF; - } else { - myRawComm = * (mpicomm->getRawMpiComm ()); + // Sanity check; I don't trust the logic above to have created LocalizedMatrix. + TEUCHOS_TEST_FOR_EXCEPTION( + LocalizedMatrix.is_null (), std::logic_error, + "Ifpack2::AdditiveSchwarz::setup: LocalizedMatrix is null, after the code " + "that claimed to have created it. This should never be the case. Please " + "report this bug to the Ifpack2 developers."); + + // Mark localized matrix as active + ActiveMatrix = LocalizedMatrix; + + // Singleton Filtering + if (FilterSingletons_) { + SingletonMatrix_ = rcp (new SingletonFilter (LocalizedMatrix)); + ActiveMatrix = SingletonMatrix_; + } + + // Do reordering + if (UseReordering_) { +#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) + // Unlike Ifpack, Zoltan2 does all the dirty work here. + typedef ReorderFilter reorder_filter_type; + Teuchos::ParameterList zlist = List_.sublist ("schwarz: reordering list"); + ReorderingAlgorithm_ = zlist.get ("order_method", "rcm"); + + ArrayRCP perm; + ArrayRCP revperm; + + if(ReorderingAlgorithm_ == "user") { + // User-provided reordering + perm = zlist.get >("user ordering"); + revperm = zlist.get >("user reverse ordering"); } - ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist, myRawComm); + else { + // Zoltan2 reordering + typedef Tpetra::RowGraph + row_graph_type; + typedef Zoltan2::TpetraRowGraphAdapter z2_adapter_type; + RCP constActiveGraph = + Teuchos::rcp_const_cast(ActiveMatrix->getGraph()); + z2_adapter_type Zoltan2Graph (constActiveGraph); + + typedef Zoltan2::OrderingProblem ordering_problem_type; +#ifdef HAVE_MPI + // Grab the MPI Communicator and build the ordering problem with that + MPI_Comm myRawComm; + + RCP > mpicomm = + rcp_dynamic_cast > (ActiveMatrix->getComm ()); + if (mpicomm == Teuchos::null) { + myRawComm = MPI_COMM_SELF; + } else { + myRawComm = * (mpicomm->getRawMpiComm ()); + } + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist, myRawComm); #else - ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist); + ordering_problem_type MyOrderingProblem (&Zoltan2Graph, &zlist); #endif - MyOrderingProblem.solve (); + MyOrderingProblem.solve (); - { - typedef Zoltan2::LocalOrderingSolution - ordering_solution_type; + { + typedef Zoltan2::LocalOrderingSolution + ordering_solution_type; - ordering_solution_type sol (*MyOrderingProblem.getLocalOrderingSolution()); + ordering_solution_type sol (*MyOrderingProblem.getLocalOrderingSolution()); - // perm[i] gives the where OLD index i shows up in the NEW - // ordering. revperm[i] gives the where NEW index i shows - // up in the OLD ordering. Note that perm is actually the - // "inverse permutation," in Zoltan2 terms. - perm = sol.getPermutationRCPConst (true); - revperm = sol.getPermutationRCPConst (); + // perm[i] gives the where OLD index i shows up in the NEW + // ordering. revperm[i] gives the where NEW index i shows + // up in the OLD ordering. Note that perm is actually the + // "inverse permutation," in Zoltan2 terms. + perm = sol.getPermutationRCPConst (true); + revperm = sol.getPermutationRCPConst (); + } } - } - // All reorderings here... - ReorderedLocalizedMatrix_ = rcp (new reorder_filter_type (ActiveMatrix, perm, revperm)); + // All reorderings here... + ReorderedLocalizedMatrix_ = rcp (new reorder_filter_type (ActiveMatrix, perm, revperm)); - ActiveMatrix = ReorderedLocalizedMatrix_; + ActiveMatrix = ReorderedLocalizedMatrix_; #else - // This is a logic_error, not a runtime_error, because - // setParameters() should have excluded this case already. - TEUCHOS_TEST_FOR_EXCEPTION( - true, std::logic_error, "Ifpack2::AdditiveSchwarz::setup: " - "The Zoltan2 and Xpetra packages must be enabled in order " - "to support reordering."); + // This is a logic_error, not a runtime_error, because + // setParameters() should have excluded this case already. + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::logic_error, "Ifpack2::AdditiveSchwarz::setup: " + "The Zoltan2 and Xpetra packages must be enabled in order " + "to support reordering."); #endif + } + innerMatrix_ = ActiveMatrix; + //The matrix for the inner preconditioner is just the reordered/localized matrix using filters + innerPrecMatrix = innerMatrix_; } - innerMatrix_ = ActiveMatrix; - TEUCHOS_TEST_FOR_EXCEPTION( - innerMatrix_.is_null (), std::logic_error, "Ifpack2::AdditiveSchwarz::" + innerMatrix_.is_null () || innerPrecMatrix.is_null(), std::logic_error, "Ifpack2::AdditiveSchwarz::" "setup: Inner matrix is null right before constructing inner solver. " "Please report this bug to the Ifpack2 developers."); @@ -1460,7 +1584,7 @@ void AdditiveSchwarz::setup () innerPrec.is_null (), std::logic_error, "Ifpack2::AdditiveSchwarz::setup: Failed to create inner preconditioner " "with name \"" << innerName << "\"."); - innerPrec->setMatrix (innerMatrix_); + innerPrec->setMatrix (innerPrecMatrix); // Extract and apply the sublist of parameters to give to the // inner solver, if there is such a sublist of parameters. @@ -1476,7 +1600,7 @@ void AdditiveSchwarz::setup () // The new inner matrix is different from the inner // preconditioner's current matrix, so give the inner // preconditioner the new inner matrix. - Inverse_->setMatrix (innerMatrix_); + Inverse_->setMatrix (innerPrecMatrix); } TEUCHOS_TEST_FOR_EXCEPTION( Inverse_.is_null (), std::logic_error, "Ifpack2::AdditiveSchwarz::" @@ -1523,7 +1647,10 @@ setInnerPreconditioner (const Teuchos::RCPsetMatrix (innerMatrix_); + if(auto asf = Teuchos::rcp_dynamic_cast>(innerMatrix_)) + innerSolver->setMatrix (asf->getFilteredMatrix()); + else + innerSolver->setMatrix (innerMatrix_); // If the user previously specified a parameter for the inner // preconditioner's type, then clear out that parameter and its diff --git a/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp new file mode 100644 index 000000000000..8537c4d04866 --- /dev/null +++ b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp @@ -0,0 +1,412 @@ + +/*@HEADER +// *********************************************************************** +// +// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package +// Copyright (2009) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +//@HEADER +*/ + +#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DECL_HPP +#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DECL_HPP + +/*! +\class AdditiveSchwarzFilter +\brief Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges, may reorder rows/columns, and may remove singletons (rows with no connections to other rows). + +This is essentially a fusion of LocalFilter, AdditiveSchwarzFilter and SingletonFilter. These three filters are used together in AdditiveSchwarz. +*/ + +#include "Ifpack2_ConfigDefs.hpp" +#include "Ifpack2_Details_RowMatrix.hpp" +#include "Ifpack2_OverlappingRowMatrix.hpp" + +namespace Ifpack2 +{ +namespace Details +{ + template + class AdditiveSchwarzFilter : + public Ifpack2::Details::RowMatrix { +public: + typedef typename MatrixType::scalar_type scalar_type; + typedef typename Kokkos::ArithTraits::val_type impl_scalar_type; + typedef typename MatrixType::local_ordinal_type local_ordinal_type; + typedef typename MatrixType::global_ordinal_type global_ordinal_type; + typedef typename MatrixType::node_type node_type; + typedef typename MatrixType::global_inds_host_view_type global_inds_host_view_type; + typedef typename MatrixType::local_inds_host_view_type local_inds_host_view_type; + typedef typename MatrixType::values_host_view_type values_host_view_type; + + typedef typename MatrixType::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type; + typedef typename MatrixType::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type; + typedef typename MatrixType::nonconst_values_host_view_type nonconst_values_host_view_type; + + typedef typename Teuchos::ScalarTraits::magnitudeType magnitude_type; + typedef Tpetra::RowMatrix row_matrix_type; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::MultiVector mv_type; + typedef typename crs_matrix_type::device_type device_type; + typedef typename crs_matrix_type::execution_space execution_space; + typedef Kokkos::RangePolicy policy_type; + typedef Kokkos::MDRangePolicy> policy_2d_type; + typedef Ifpack2::OverlappingRowMatrix overlapping_matrix_type; + typedef typename crs_matrix_type::local_matrix_device_type local_matrix_type; + typedef typename local_matrix_type::size_type size_type; + typedef typename local_matrix_type::row_map_type::non_const_type row_map_type; + typedef typename local_matrix_type::index_type entries_type; + typedef typename local_matrix_type::values_type values_type; + typedef typename row_map_type::HostMirror host_row_map_type; + typedef typename entries_type::HostMirror host_entries_type; + typedef typename values_type::HostMirror host_values_type; + typedef typename local_matrix_type::HostMirror host_local_matrix_type; + + static_assert(std::is_same::value, "Ifpack2::AdditiveSchwarzFilter: The template parameter MatrixType must be a Tpetra::RowMatrix specialization. Please don't use Tpetra::CrsMatrix (a subclass of Tpetra::RowMatrix) here anymore. The constructor can take either a RowMatrix or a CrsMatrix just fine."); + + typedef Tpetra::Map map_type; + + typedef typename row_matrix_type::mag_type mag_type; + + //! \name Constructor & destructor methods + //@{ + + /// \brief Constructor. + /// + /// \param A [in] The matrix to which to apply the filter. + // Must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix that wraps a Tpetra::CrsMatrix. + /// \param perm [in] Forward permutation of A's rows and columns. + /// \param reverseperm [in] Reverse permutation of A's rows and columns. + /// \param filterSingletons [in] If true, remove rows that have no local neighbors. + /// + /// It must make sense to apply the given permutation to both the + /// rows and columns. This means that the row and column Maps must + /// have the same numbers of entries on all processes, and must have + /// the same order of GIDs on all processes. + /// + /// perm[i] gives the where OLD index i shows up in the NEW + /// ordering. revperm[i] gives the where NEW index i shows up in + /// the OLD ordering. Note that perm is actually the "inverse + /// permutation," in Zoltan2 terms. + AdditiveSchwarzFilter(const Teuchos::RCP& A, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons); + + //! Update the filtered matrix in response to A's values changing (A's structure isn't allowed to change, though) + void updateMatrixValues(); + + Teuchos::RCP getFilteredMatrix() const; + + //! Destructor. + virtual ~AdditiveSchwarzFilter (); + + //@} + //! \name Matrix query methods + //@{ + + //! The matrix's communicator. + virtual Teuchos::RCP > getComm() const; + + //! Returns the Map that describes the row distribution in this matrix. + virtual Teuchos::RCP getRowMap() const; + + //! Returns the Map that describes the column distribution in this matrix. + virtual Teuchos::RCP getColMap() const; + + //! Returns the Map that describes the domain distribution in this matrix. + virtual Teuchos::RCP getDomainMap() const; + + //! Returns the Map that describes the range distribution in this matrix. + virtual Teuchos::RCP getRangeMap() const; + + //! Returns the RowGraph associated with this matrix. + virtual Teuchos::RCP > getGraph() const; + + //! Returns the number of global rows in this matrix. + virtual global_size_t getGlobalNumRows() const; + + //! \brief Returns the number of global columns in this matrix. + virtual global_size_t getGlobalNumCols() const; + + //! Returns the number of rows owned on the calling node. + virtual size_t getLocalNumRows() const; + + //! Returns the number of columns needed to apply the forward operator on this node, i.e., the number of elements listed in the column map. + virtual size_t getLocalNumCols() const; + + //! Returns the index base for global indices for this matrix. + virtual global_ordinal_type getIndexBase() const; + + //! Returns the global number of entries in this matrix. + virtual global_size_t getGlobalNumEntries() const; + + //! Returns the local number of entries in this matrix. + virtual size_t getLocalNumEntries() const; + + /// \brief The current number of entries in this matrix, stored on + /// the calling process, in the row whose global index is \c globalRow. + /// + /// \return The number of entries, or + /// Teuchos::OrdinalTraits::invalid() if the specified row + /// is not owned by the calling process. + virtual size_t getNumEntriesInGlobalRow (global_ordinal_type globalRow) const; + + /// \brief The current number of entries in this matrix, stored on + /// the calling process, in the row whose local index is \c globalRow. + /// + /// \return The number of entries, or + /// Teuchos::OrdinalTraits::invalid() if the specified row + /// is not owned by the calling process. + virtual size_t getNumEntriesInLocalRow (local_ordinal_type localRow) const; + + //! \brief Returns the maximum number of entries across all rows/columns on all nodes. + virtual size_t getGlobalMaxNumRowEntries() const; + + //! \brief Returns the maximum number of entries across all rows/columns on this node. + virtual size_t getLocalMaxNumRowEntries() const; + + //! \brief Indicates whether this matrix has a well-defined column map. + virtual bool hasColMap() const; + + //! \brief If matrix indices are in the local range, this function returns true. Otherwise, this function returns false. */ + virtual bool isLocallyIndexed() const; + + //! \brief If matrix indices are in the global range, this function returns true. Otherwise, this function returns false. */ + virtual bool isGloballyIndexed() const; + + //! Returns \c true if fillComplete() has been called. + virtual bool isFillComplete() const; + + //! Returns \c true if RowViews are supported. + virtual bool supportsRowViews() const; + + //@} + + //! @name Extraction Methods + //@{ + + //! Extract a list of entries in a specified global row of this matrix. Put into pre-allocated storage. + /*! + \param GlobalRow - (In) Global row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Matrix values. + \param NumEntries - (Out) Number of indices. + + Note: A std::runtime_error exception is thrown if either \c Indices or \c Values is not large enough to hold the data associated + with row \c GlobalRow. If \c GlobalRow does not belong to this node, then \c Indices and \c Values are unchanged and \c NumIndices is + returned as Teuchos::OrdinalTraits::invalid(). + */ + virtual void + getGlobalRowCopy (global_ordinal_type GlobalRow, + nonconst_global_inds_host_view_type &Indices, + nonconst_values_host_view_type &Values, + size_t& NumEntries) const; + //! Extract a list of entries in a specified local row of the graph. Put into storage allocated by calling routine. + /*! + \param LocalRow - (In) Local row number for which indices are desired. + \param Indices - (Out) Local column indices corresponding to values. + \param Values - (Out) Matrix values. + \param NumIndices - (Out) Number of indices. + + Note: A std::runtime_error exception is thrown if either \c Indices or \c Values is not large enough to hold the data associated + with row \c LocalRow. If \c LocalRow is not valid for this node, then \c Indices and \c Values are unchanged and \c NumIndices is + returned as Teuchos::OrdinalTraits::invalid(). + */ + virtual void + getLocalRowCopy (local_ordinal_type LocalRow, + nonconst_local_inds_host_view_type &Indices, + nonconst_values_host_view_type &Values, + size_t& NumEntries) const; + + //! Extract a const, non-persisting view of global indices in a specified row of the matrix. + /*! + \param GlobalRow - (In) Global row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Row values + \post indices.size() == getNumEntriesInGlobalRow(GlobalRow) + \pre isLocallyIndexed() == false + Note: If \c GlobalRow does not belong to this node, then \c indices is set to null. + */ + virtual void + getGlobalRowView (global_ordinal_type GlobalRow, + global_inds_host_view_type &indices, + values_host_view_type &values) const; + //! Extract a const, non-persisting view of local indices in a specified row of the matrix. + /*! + \param LocalRow - (In) Local row number for which indices are desired. + \param Indices - (Out) Global column indices corresponding to values. + \param Values - (Out) Row values + \pre isGloballyIndexed() == false + \post indices.size() == getNumEntriesInDropRow(LocalRow) + + Note: If \c LocalRow does not belong to this node, then \c indices is set to null. + */ + virtual void + getLocalRowView (local_ordinal_type LocalRow, + local_inds_host_view_type & indices, + values_host_view_type & values) const; + //! \brief Get a copy of the diagonal entries owned by this node, with local row indices. + /*! Returns a distributed Vector object partitioned according to this matrix's row map, containing the + the zero and non-zero diagonals owned by this node. */ + virtual void getLocalDiagCopy(Tpetra::Vector &diag) const; + + //@} + + //! \name Mathematical Methods + //@{ + + /** + * \brief Scales the RowMatrix on the left with the Vector x. + * + * This matrix will be scaled such that A(i,j) = x(i)*A(i,j) + * where i denoes the global row number of A and + * j denotes the global column number of A. + * + * \param x A vector to left scale this matrix. + */ + virtual void leftScale(const Tpetra::Vector& x); + + /** + * \brief Scales the RowMatrix on the right with the Vector x. + * + * This matrix will be scaled such that A(i,j) = x(j)*A(i,j) + * where i denoes the global row number of A and + * j denotes the global column number of A. + * + * \param x A vector to right scale this matrix. + */ + virtual void rightScale(const Tpetra::Vector& x); + + //! Returns the Frobenius norm of the matrix. + /** Computes and returns the Frobenius norm of the matrix, defined as: + \f$ \|A\|_F = \sqrt{\sum_{i,j} \|\a_{ij}\|^2} \f$ + */ + virtual mag_type getFrobeniusNorm() const; + + /// \brief \f$ Y := \beta Y + \alpha Op(A) X \f$, + /// where Op(A) is either A, \f$A^T\f$, or \f$A^H\f$. + /// + /// Apply the reordered version of the matrix (or its transpose or + /// conjugate transpose) to the given multivector X, producing Y. + /// - if beta == 0, apply() must overwrite \c Y, + /// so that any values in \c Y (including NaNs) are ignored. + /// - if alpha == 0, apply() may short-circuit the + /// matrix, so that any values in \c X (including NaNs) are + /// ignored. + /// + /// This method assumes that X and Y are in the reordered order. + /// + /// If hasTransposeApply() returns false, then the only valid value + /// of \c mode is Teuchos::NO_TRANS (the default). Otherwise, it + /// accepts the following values: + /// - mode = Teuchos::NO_TRANS: Op(A) is the reordered version of A. + /// - mode = Teuchos::TRANS: Op(A) is the reordered version of the + /// transpose of A. + /// - mode = Teuchos::CONJ_TRANS: Op(A) is reordered version of + /// the conjugate transpose of A. + virtual void + apply (const Tpetra::MultiVector &X, + Tpetra::MultiVector &Y, + Teuchos::ETransp mode = Teuchos::NO_TRANS, + scalar_type alpha = Teuchos::ScalarTraits::one(), + scalar_type beta = Teuchos::ScalarTraits::zero()) const; + + //! Whether apply() can apply the transpose or conjugate transpose. + virtual bool hasTransposeApply() const; + + //! Solve singleton rows of OverlappingY, then filter and permute OverlappingB to get the reduced B. + void CreateReducedProblem( + const Tpetra::MultiVector& OverlappingB, + Tpetra::MultiVector& OverlappingY, + Tpetra::MultiVector& ReducedReorderedB) const; + + //! Scatter ReducedY back to non-singleton rows of OverlappingY, according to the reordering. + void UpdateLHS( + const Tpetra::MultiVector& ReducedReorderedY, + Tpetra::MultiVector& OverlappingY) const; + + void setup(const Teuchos::RCP& A_unfiltered, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons); + + /// \brief Fill the entries/values in the local matrix, and from that construct A_. + /// + /// Requires that the structure of A_unfiltered_ has not changed since this was constructed. + /// Used by both the constructor and updateMatrixValues(). + void fillLocalMatrix(local_matrix_type localMatrix); + + //@} + +private: + //! Pointer to the matrix to be preconditioned. + Teuchos::RCP A_unfiltered_; + //! Filtered and reordered matrix. + Teuchos::RCP A_; + //! Permutation: Original to reordered. Size is number of local rows in A_unfiltered_. Rows which were filtered from A_ map to -1 (Invalid). + Kokkos::DualView perm_; + //! Permutation: Reordered to original. Size is number of local rows in A_. + Kokkos::DualView reverseperm_; + local_ordinal_type numSingletons_; + //! List of singleton rows (indices are rows in A_unfiltered_) + Kokkos::DualView singletons_; + //! Diagonal values of singletons (used for solving those rows) + Kokkos::DualView singletonDiagonals_; + + //! Whether singletons (disconnected rows) are filtered out of the local matrix + // (apply and solve still process those rows separately) + bool FilterSingletons_; + + //! Row, col, domain and range map of this locally filtered matrix (it's square and non-distributed) + Teuchos::RCP localMap_; +}; + +}} //namespace Ifpack2::Details + +#endif + diff --git a/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp new file mode 100644 index 000000000000..0793527b4cc1 --- /dev/null +++ b/packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp @@ -0,0 +1,729 @@ +/*@HEADER +// *********************************************************************** +// +// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package +// Copyright (2009) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +//@HEADER +*/ + +#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP +#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP + +#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp" +#include "KokkosKernels_Sorting.hpp" +#include "Kokkos_Bitset.hpp" + +namespace Ifpack2 +{ +namespace Details +{ + +template +AdditiveSchwarzFilter:: +AdditiveSchwarzFilter(const Teuchos::RCP& A_unfiltered, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons) +{ + setup(A_unfiltered, perm, reverseperm, filterSingletons); +} + +template +void AdditiveSchwarzFilter:: +setup(const Teuchos::RCP& A_unfiltered, + const Teuchos::ArrayRCP& perm, + const Teuchos::ArrayRCP& reverseperm, + bool filterSingletons) +{ + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::rcp_dynamic_cast; + //Check that A is an instance of allowed types, either: + // - Tpetra::CrsMatrix + // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_) + TEUCHOS_TEST_FOR_EXCEPTION( + !rcp_dynamic_cast(A_unfiltered) && + !rcp_dynamic_cast(A_unfiltered), + std::invalid_argument, + "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix"); + A_unfiltered_ = A_unfiltered; + local_matrix_type A_local, A_halo; + bool haveHalo = !rcp_dynamic_cast(A_unfiltered_).is_null(); + if(haveHalo) + { + auto A_overlapping = rcp_dynamic_cast(A_unfiltered_); + A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice(); + A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice(); + } + else + { + auto A_crs = rcp_dynamic_cast(A_unfiltered_); + A_local = A_crs->getLocalMatrixDevice(); + //leave A_halo empty in this case + } + //Check that perm and reversePerm are the same length and match the number of local rows in A + TEUCHOS_TEST_FOR_EXCEPTION( + perm.size() != reverseperm.size(), + std::invalid_argument, + "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length"); + TEUCHOS_TEST_FOR_EXCEPTION( + (size_t) perm.size() != (size_t) A_unfiltered_->getLocalNumRows(), + std::invalid_argument, + "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A"); + //First, compute the permutation tables (that exclude singletons, if requested) + FilterSingletons_ = filterSingletons; + local_ordinal_type numLocalRows; + local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows(); + if(FilterSingletons_) + { + //Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size) + singletons_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows); + singletonDiagonals_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows); + auto singletonsDevice = singletons_.view_device(); + singletons_.modify_device(); + auto singletonDiagonalsDevice = singletonDiagonals_.view_device(); + singletonDiagonals_.modify_device(); + local_ordinal_type numSingletons; + Kokkos::Bitset isSingletonBitset(totalLocalRows); + Kokkos::parallel_scan(policy_type(0, totalLocalRows), + KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons, bool finalPass) + { + bool isSingleton = true; + impl_scalar_type singletonValue = Kokkos::ArithTraits::zero(); + if(i < A_local.numRows()) + { + //row i is in original local matrix + size_type rowBegin = A_local.graph.row_map(i); + size_type rowEnd = A_local.graph.row_map(i + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_local.graph.entries(j); + if(entry < totalLocalRows && entry != i) + { + isSingleton = false; + break; + } + if(finalPass && entry == i) + { + //note: using a sum to compute the diagonal value, in case entries are not compressed. + singletonValue += A_local.values(j); + } + } + } + else + { + //row i is in halo + local_ordinal_type row = i - A_local.numRows(); + size_type rowBegin = A_halo.graph.row_map(row); + size_type rowEnd = A_halo.graph.row_map(row + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_halo.graph.entries(j); + if(entry < totalLocalRows && entry != i) + { + isSingleton = false; + break; + } + if(finalPass && entry == i) + { + singletonValue += A_halo.values(j); + } + } + } + if(isSingleton) + { + if(finalPass) + { + isSingletonBitset.set(i); + singletonsDevice(lnumSingletons) = i; + singletonDiagonalsDevice(lnumSingletons) = singletonValue; + } + lnumSingletons++; + } + }, numSingletons); + numSingletons_ = numSingletons; + //Each local row in A_unfiltered is either a singleton or a row in the filtered matrix. + numLocalRows = totalLocalRows - numSingletons_; + //Using the list of singletons, create the reduced permutations. + perm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows); + perm_.modify_device(); + reverseperm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows); + reverseperm_.modify_device(); + auto permView = perm_.view_device(); + auto reversepermView = reverseperm_.view_device(); + //Get the original inverse permutation on device + Kokkos::View origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows); + Kokkos::View origpermHost(reverseperm.get(), totalLocalRows); + Kokkos::deep_copy(execution_space(), origpermView, origpermHost); + //reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation. + Kokkos::parallel_scan(policy_type(0, totalLocalRows), + KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex, bool finalPass) + { + local_ordinal_type origRow = origpermView(i); + if(!isSingletonBitset.test(origRow)) + { + if(finalPass) + { + //mark the mapping in both directions between origRow and lindex (a row in filtered A) + reversepermView(lindex) = origRow; + permView(origRow) = lindex; + } + lindex++; + } + else + { + //origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A. + if(finalPass) + permView(origRow) = local_ordinal_type(-1); + } + }); + perm_.sync_host(); + reverseperm_.sync_host(); + } + else + { + //We will keep all the local rows, so use the permutation as is + perm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size()); + perm_.modify_host(); + auto permHost = perm_.view_host(); + for(size_t i = 0; i < (size_t) perm.size(); i++) + permHost(i) = perm[i]; + perm_.sync_device(); + reverseperm_ = Kokkos::DualView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size()); + reverseperm_.modify_host(); + auto reversepermHost = reverseperm_.view_host(); + for(size_t i = 0; i < (size_t) reverseperm.size(); i++) + reversepermHost(i) = reverseperm[i]; + reverseperm_.sync_device(); + numSingletons_ = 0; + numLocalRows = totalLocalRows; + } + auto permView = perm_.view_device(); + auto reversepermView = reverseperm_.view_device(); + //Fill the local matrix of A_ (filtered and permuted) + //First, construct the rowmap by counting the entries in each row + row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1); + Kokkos::parallel_for(policy_type(0, numLocalRows), + KOKKOS_LAMBDA(local_ordinal_type i) + { + //Count the entries that will be in filtered row i. + //This means entries which correspond to local, non-singleton rows/columns. + local_ordinal_type numEntries = 0; + local_ordinal_type origRow = reversepermView(i); + if(origRow < A_local.numRows()) + { + //row i is in original local matrix + size_type rowBegin = A_local.graph.row_map(origRow); + size_type rowEnd = A_local.graph.row_map(origRow + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_local.graph.entries(j); + if(entry < totalLocalRows && permView(entry) != -1) + numEntries++; + } + } + else + { + //row i is in halo + local_ordinal_type row = origRow - A_local.numRows(); + size_type rowBegin = A_halo.graph.row_map(row); + size_type rowEnd = A_halo.graph.row_map(row + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_halo.graph.entries(j); + if(entry < totalLocalRows && permView(entry) != -1) + numEntries++; + } + } + localRowptrs(i) = numEntries; + if(i == numLocalRows - 1) + localRowptrs(i + 1) = 0; + }); + //Prefix sum to finish computing the rowptrs + size_type numLocalEntries; + Kokkos::parallel_scan(policy_type(0, numLocalRows + 1), + KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries, bool finalPass) + { + size_type numEnt = localRowptrs(i); + if(finalPass) + localRowptrs(i) = lnumEntries; + lnumEntries += numEnt; + }, numLocalEntries); + //Allocate and fill local entries and values + entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries); + values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries); + //Create the local matrix with the uninitialized entries/values, then fill it. + local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries); + fillLocalMatrix(localMatrix); + //Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map) +#ifdef HAVE_IFPACK2_DEBUG + TEUCHOS_TEST_FOR_EXCEPTION( + ! mapPairsAreFitted (A_unfiltered_), std::invalid_argument, "Ifpack2::LocalFilter: " + "A's Map pairs are not fitted to each other on Process " + << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's " + "communicator. " + "This means that LocalFilter does not currently know how to work with A. " + "This will change soon. Please see discussion of Bug 5992."); +#endif // HAVE_IFPACK2_DEBUG + + // Build the local communicator (containing this process only). + RCP > localComm; +#ifdef HAVE_MPI + localComm = rcp (new Teuchos::MpiComm (MPI_COMM_SELF)); +#else + localComm = rcp (new Teuchos::SerialComm ()); +#endif // HAVE_MPI + //All 4 maps are the same for a local square operator. + localMap_ = rcp (new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed)); + //Create the inner filtered matrix. + auto crsParams = rcp(new Teuchos::ParameterList); + crsParams->set("sorted", true); + //NOTE: this is the fastest way to construct A_ - it's created as fillComplete, + //and no communication needs to be done since localMap_ uses a local comm. + //It does need to copy the whole local matrix to host when DualViews are constructed + //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix. + //It also needs to compute local constants (maxNumRowEntries) but this should be a + //cheap operation relative to what this constructor already did. + A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams)); +} + +template +AdditiveSchwarzFilter::~AdditiveSchwarzFilter () {} + +template +void AdditiveSchwarzFilter::updateMatrixValues() +{ + //Get the local matrix back from A_ + auto localMatrix = A_->getLocalMatrixDevice(); + //Copy new values from A_unfiltered to the local matrix, and then reconstruct A_. + fillLocalMatrix(localMatrix); + A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values); +} + +template +Teuchos::RCP::crs_matrix_type> +AdditiveSchwarzFilter::getFilteredMatrix() const +{ + return A_; +} + +template +void AdditiveSchwarzFilter::fillLocalMatrix(typename AdditiveSchwarzFilter::local_matrix_type localMatrix) +{ + auto localRowptrs = localMatrix.graph.row_map; + auto localEntries = localMatrix.graph.entries; + auto localValues = localMatrix.values; + auto permView = perm_.view_device(); + auto reversepermView = reverseperm_.view_device(); + local_matrix_type A_local, A_halo; + bool haveHalo = !Teuchos::rcp_dynamic_cast(A_unfiltered_).is_null(); + if(haveHalo) + { + auto A_overlapping = Teuchos::rcp_dynamic_cast(A_unfiltered_); + A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice(); + A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice(); + } + else + { + auto A_crs = Teuchos::rcp_dynamic_cast(A_unfiltered_); + A_local = A_crs->getLocalMatrixDevice(); + //leave A_halo empty in this case + } + local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows(); + //Fill entries and values + Kokkos::parallel_for(policy_type(0, localMatrix.numRows()), + KOKKOS_LAMBDA(local_ordinal_type i) + { + //Count the entries that will be in filtered row i. + //This means entries which correspond to local, non-singleton rows/columns. + size_type outRowStart = localRowptrs(i); + local_ordinal_type insertPos = 0; + local_ordinal_type origRow = reversepermView(i); + if(origRow < A_local.numRows()) + { + //row i is in original local matrix + size_type rowBegin = A_local.graph.row_map(origRow); + size_type rowEnd = A_local.graph.row_map(origRow + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_local.graph.entries(j); + if(entry < totalLocalRows) + { + auto newCol = permView(entry); + if(newCol != -1) + { + localEntries(outRowStart + insertPos) = newCol; + localValues(outRowStart + insertPos) = A_local.values(j); + insertPos++; + } + } + } + } + else + { + //row i is in halo + local_ordinal_type row = origRow - A_local.numRows(); + size_type rowBegin = A_halo.graph.row_map(row); + size_type rowEnd = A_halo.graph.row_map(row + 1); + for(size_type j = rowBegin; j < rowEnd; j++) + { + local_ordinal_type entry = A_halo.graph.entries(j); + if(entry < totalLocalRows) + { + auto newCol = permView(entry); + if(newCol != -1) + { + localEntries(outRowStart + insertPos) = newCol; + localValues(outRowStart + insertPos) = A_halo.values(j); + insertPos++; + } + } + } + } + }); + //Sort the matrix, since the reordering can shuffle the entries into any order. + KokkosKernels::sort_crs_matrix(localMatrix); +} + +template +Teuchos::RCP > AdditiveSchwarzFilter::getComm() const +{ + return localMap_->getComm(); +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getRowMap() const +{ + return localMap_; +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getColMap() const +{ + return localMap_; +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getDomainMap() const +{ + return localMap_; +} + +template +Teuchos::RCP::map_type> +AdditiveSchwarzFilter::getRangeMap() const +{ + return localMap_; +} + +template +Teuchos::RCP > +AdditiveSchwarzFilter::getGraph() const +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGraph."); +} + +template +global_size_t AdditiveSchwarzFilter::getGlobalNumRows() const +{ + return A_->getGlobalNumRows(); +} + +template +global_size_t AdditiveSchwarzFilter::getGlobalNumCols() const +{ + return A_->getGlobalNumCols(); +} + +template +size_t AdditiveSchwarzFilter::getLocalNumRows() const +{ + return A_->getLocalNumRows(); +} + +template +size_t AdditiveSchwarzFilter::getLocalNumCols() const +{ + return A_->getLocalNumCols(); +} + +template +typename MatrixType::global_ordinal_type AdditiveSchwarzFilter::getIndexBase() const +{ + return A_->getIndexBase(); +} + +template +global_size_t AdditiveSchwarzFilter::getGlobalNumEntries() const +{ + return getLocalNumEntries(); +} + +template +size_t AdditiveSchwarzFilter::getLocalNumEntries() const +{ + return A_->getLocalNumEntries(); +} + +template +size_t AdditiveSchwarzFilter:: +getNumEntriesInGlobalRow (global_ordinal_type globalRow) const +{ + return A_->getNumEntriesInGlobalRow(globalRow); +} + +template +size_t AdditiveSchwarzFilter:: +getNumEntriesInLocalRow (local_ordinal_type localRow) const +{ + return A_->getNumEntriesInLocalRow(localRow); +} + +template +size_t AdditiveSchwarzFilter::getGlobalMaxNumRowEntries() const +{ + //Use A_unfiltered_ to get a valid upper bound for this. + //This lets us support this function without computing global constants on A_. + return A_unfiltered_->getGlobalMaxNumRowEntries(); +} + +template +size_t AdditiveSchwarzFilter::getLocalMaxNumRowEntries() const +{ + //Use A_unfiltered_ to get a valid upper bound for this + return A_unfiltered_->getLocalMaxNumRowEntries(); +} + + +template +bool AdditiveSchwarzFilter::hasColMap() const +{ + return true; +} + + +template +bool AdditiveSchwarzFilter::isLocallyIndexed() const +{ + return true; +} + + +template +bool AdditiveSchwarzFilter::isGloballyIndexed() const +{ + return false; +} + + +template +bool AdditiveSchwarzFilter::isFillComplete() const +{ + return true; +} + + +template +void AdditiveSchwarzFilter:: + getGlobalRowCopy (global_ordinal_type globalRow, + nonconst_global_inds_host_view_type &globalInd, + nonconst_values_host_view_type &val, + size_t& numEntries) const +{ + throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy."); +} + +template +void AdditiveSchwarzFilter:: +getLocalRowCopy (local_ordinal_type LocalRow, + nonconst_local_inds_host_view_type &Indices, + nonconst_values_host_view_type &Values, + size_t& NumEntries) const + +{ + A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries); +} + +template +void AdditiveSchwarzFilter::getGlobalRowView(global_ordinal_type /* GlobalRow */, + global_inds_host_view_type &/*indices*/, + values_host_view_type &/*values*/) const +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView."); +} + +template +void AdditiveSchwarzFilter::getLocalRowView(local_ordinal_type LocalRow, + local_inds_host_view_type & indices, + values_host_view_type & values) const +{ + A_->getLocalRowView(LocalRow, indices, values); +} + +template +void AdditiveSchwarzFilter:: +getLocalDiagCopy (Tpetra::Vector &diag) const +{ + // This is somewhat dubious as to how the maps match. + A_->getLocalDiagCopy(diag); +} + +template +void AdditiveSchwarzFilter::leftScale(const Tpetra::Vector& /* x */) +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale."); +} + +template +void AdditiveSchwarzFilter::rightScale(const Tpetra::Vector& /* x */) +{ + throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale."); +} + +template +void AdditiveSchwarzFilter:: +apply (const Tpetra::MultiVector &X, + Tpetra::MultiVector &Y, + Teuchos::ETransp mode, + scalar_type alpha, + scalar_type beta) const +{ + A_->apply(X, Y, mode, alpha, beta); +} + +template +void AdditiveSchwarzFilter:: +CreateReducedProblem( + const Tpetra::MultiVector& OverlappingB, + Tpetra::MultiVector& OverlappingY, + Tpetra::MultiVector& ReducedReorderedB) const +{ + //NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews, + //so they are all constant stride (this avoids a lot of boilerplate with whichVectors) + TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(), + std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride."); + local_ordinal_type numVecs = OverlappingB.getNumVectors(); + auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll); + auto singletons = singletons_.view_device(); + auto singletonDiags = singletonDiagonals_.view_device(); + auto revperm = reverseperm_.view_device(); + //First, solve the singletons. + { + auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite); + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}), + KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col) + { + local_ordinal_type row = singletons(singletonIndex); + y(row, col) = b(row, col) / singletonDiags(singletonIndex); + }); + } + //Next, permute OverlappingB to ReducedReorderedB. + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}), + KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) + { + reducedB(row, col) = b(revperm(row), col); + }); + //Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction) + //This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity. + //Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB. + if(numSingletons_) + { + mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false); + A_unfiltered_->apply(OverlappingY, singletonUpdates); + auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto revperm = reverseperm_.view_device(); + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}), + KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) + { + local_ordinal_type origRow = revperm(row); + reducedB(row, col) -= updatesView(origRow, col); + }); + } +} + +template +void AdditiveSchwarzFilter::UpdateLHS( + const Tpetra::MultiVector& ReducedReorderedY, + Tpetra::MultiVector& OverlappingY) const +{ + //NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews, + //so they are all constant stride (this avoids a lot of boilerplate with whichVectors) + TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(), + std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride."); + auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite); + auto revperm = reverseperm_.view_device(); + Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}), + KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col) + { + y(revperm(row), col) = reducedY(row, col); + }); +} + +template +bool AdditiveSchwarzFilter::hasTransposeApply() const +{ + return true; +} + + +template +bool AdditiveSchwarzFilter::supportsRowViews() const +{ + return true; +} + +template +typename AdditiveSchwarzFilter::mag_type AdditiveSchwarzFilter::getFrobeniusNorm() const +{ + // Reordering doesn't change the Frobenius norm. + return A_->getFrobeniusNorm (); +} + +}} // namespace Ifpack2::Details + +#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \ + template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix >; + +#endif + diff --git a/packages/ifpack2/src/Ifpack2_IlukGraph.hpp b/packages/ifpack2/src/Ifpack2_IlukGraph.hpp index cff61d43f41f..9f715f4f8a6b 100644 --- a/packages/ifpack2/src/Ifpack2_IlukGraph.hpp +++ b/packages/ifpack2/src/Ifpack2_IlukGraph.hpp @@ -281,6 +281,7 @@ void IlukGraph::initialize() // Get Maximum Row length const int MaxNumIndices = OverlapGraph_->getLocalMaxNumRowEntries (); + std::cout << "IlukGraph: OverlapGraph says max num row entries is " << MaxNumIndices << '\n'; // FIXME (mfh 23 Dec 2013) Use size_t or whatever // getLocalNumElements() returns, instead of ptrdiff_t. diff --git a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp index 00433568cf6b..167a464a1623 100644 --- a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp @@ -75,6 +75,8 @@ class OverlappingRowMatrix : using row_matrix_type = Tpetra::RowMatrix; + using crs_matrix_type = Tpetra::CrsMatrix; static_assert(std::is_same::value, "Ifpack2::OverlappingRowMatrix: The template parameter MatrixType must be a Tpetra::RowMatrix specialization. Please don't use Tpetra::CrsMatrix (a subclass of Tpetra::RowMatrix) here anymore. The constructor can take either a RowMatrix or a CrsMatrix just fine."); @@ -341,18 +343,19 @@ class OverlappingRowMatrix : void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const; - virtual Teuchos::RCP getUnderlyingMatrix() const; + Teuchos::RCP getUnderlyingMatrix() const; - Teuchos::RCP getExtMatrix() const; + Teuchos::RCP getExtMatrix() const; Teuchos::ArrayView getExtHaloStarts() const; + void doExtImport(); + private: typedef Tpetra::Map map_type; typedef Tpetra::Import import_type; typedef Tpetra::Export export_type; typedef Tpetra::RowGraph row_graph_type; - typedef Tpetra::CrsMatrix crs_matrix_type; typedef Tpetra::Vector vector_type; //! The input matrix to the constructor. @@ -369,7 +372,7 @@ class OverlappingRowMatrix : Teuchos::RCP Importer_; //! The matrix containing only the overlap rows. - Teuchos::RCP ExtMatrix_; + Teuchos::RCP ExtMatrix_; Teuchos::RCP ExtMap_; Teuchos::RCP ExtImporter_; Teuchos::Array ExtHaloStarts_; diff --git a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp index f24c7ce5d4e3..86a1175e3e36 100644 --- a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp +++ b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp @@ -175,11 +175,9 @@ OverlappingRowMatrix (const Teuchos::RCP& A, ExtImporter_ = rcp (new import_type (A_->getRowMap (), ExtMap_)); { - RCP ExtMatrix_nc = - rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); - ExtMatrix_nc->doImport (*A_, *ExtImporter_, Tpetra::INSERT); - ExtMatrix_nc->fillComplete (A_->getDomainMap (), RowMap_); - ExtMatrix_ = ExtMatrix_nc; // we only need the const version after here + ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); + ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT); + ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_); } // fix indices for overlapping matrix @@ -819,14 +817,14 @@ void OverlappingRowMatrix::describe(Teuchos::FancyOStream &out, } template -Teuchos::RCP > +Teuchos::RCP > OverlappingRowMatrix::getUnderlyingMatrix() const { return A_; } template -Teuchos::RCP > +Teuchos::RCP > OverlappingRowMatrix::getExtMatrix() const { return ExtMatrix_; @@ -838,6 +836,13 @@ Teuchos::ArrayView OverlappingRowMatrix::getExtHaloSta return ExtHaloStarts_(); } +template +void OverlappingRowMatrix::doExtImport() +{ + ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); + ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT); + ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_); +} } // namespace Ifpack2 diff --git a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp index 2d50702d3203..b80f6600229c 100644 --- a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp +++ b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestAdditiveSchwarz.cpp @@ -133,9 +133,11 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(Ifpack2AdditiveSchwarz, Test0, Scalar, LocalOr params.set ("schwarz: combine mode", "Zero"); #if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2) + std::cout << "Test0: Enabling reordering!\n"; params.set ("schwarz: use reordering", true); params.set ("schwarz: reordering list", zlist); #else + std::cout << "Test0: NOT enabling reordering!\n"; params.set ("schwarz: use reordering", false); #endif diff --git a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp index f15c6d2f0d09..7a8c12d4087d 100644 --- a/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp +++ b/packages/ifpack2/test/unit_tests/Ifpack2_UnitTestLocalSparseTriangularSolver.cpp @@ -84,15 +84,8 @@ localSolve (Tpetra::MultiVector< using Teuchos::CONJ_TRANS; using Teuchos::NO_TRANS; using Teuchos::TRANS; - using MV = Tpetra::MultiVector< - typename CrsMatrixType::scalar_type, - typename CrsMatrixType::local_ordinal_type, - typename CrsMatrixType::global_ordinal_type, - typename CrsMatrixType::node_type>; using scalar_type = typename CrsMatrixType::scalar_type; using STS = Teuchos::ScalarTraits; - using device_type = typename CrsMatrixType::device_type; - using dev_memory_space = typename device_type::memory_space; const char prefix[] = "localSolve: "; TEUCHOS_TEST_FOR_EXCEPTION @@ -999,12 +992,6 @@ void testArrowMatrix (bool& success, Teuchos::FancyOStream& out) L,U,out); if(!gblSuccess) return; - typedef typename crs_matrix_type::local_graph_device_type local_graph_type; - typedef typename crs_matrix_type::local_matrix_device_type local_matrix_type; - typedef typename local_matrix_type::row_map_type::non_const_type row_offsets_type; - typedef typename local_graph_type::entries_type::non_const_type col_inds_type; - typedef typename local_matrix_type::values_type::non_const_type values_type; - typedef typename crs_matrix_type::local_inds_host_view_type const_local_inds_type; typedef typename crs_matrix_type::values_host_view_type const_values_type; diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp index 79da3ba2edae..04cec10c9bce 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp @@ -1326,7 +1326,7 @@ namespace Tpetra { /// /// \pre columnIndices are sorted within rows /// \pre hasColMap() == true - /// \pre rowPointers.size() != getLocalNumRows()+1 + /// \pre rowPointers.size() == getLocalNumRows()+1 /// \pre No insert routines have been called. /// /// \warning This method is intended for expert developer use @@ -1339,7 +1339,7 @@ namespace Tpetra { /// /// \pre columnIndices are sorted within rows /// \pre hasColMap() == true - /// \pre rowPointers.size() != getLocalNumRows()+1 + /// \pre rowPointers.size() == getLocalNumRows()+1 /// \pre No insert routines have been called. /// /// \warning This method is intended for expert developer use diff --git a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp index e108e6134fcf..b7a7792e7a68 100644 --- a/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp @@ -2720,6 +2720,7 @@ namespace Tpetra { } } + /* // FIXME (mfh 07 Aug 2014) We need to relax this restriction, // since the future model will be allocation at construction, not // lazy allocation on first insert. @@ -2727,6 +2728,7 @@ namespace Tpetra { ((this->lclIndsUnpacked_wdv.extent (0) != 0 || this->gblInds_wdv.extent (0) != 0), std::runtime_error, "You may not call this method if 1-D data " "structures are already allocated."); + */ indicesAreAllocated_ = true; indicesAreLocal_ = true;