Skip to content

Commit

Permalink
Ifpack2: AdditiveSchwarz matrix filtering in parallel
Browse files Browse the repository at this point in the history
Add a fast path ("Details::AdditiveSchwarzFilter") where LocalFilter,
SingletonFilter and ReorderFilter are applied all at once on device.
This benefits initialize, compute and apply, since the old path only
supported host matrix access with getLocalRowCopy, and each filter pass
did its own copy. The old path is still in place to support non-CrsMatrix
inputs to AdditiveSchwarz.

Also fix #9606 by having AdditiveSchwarz::compute re-import the halo
rows of the OverlappingRowMatrix (ExtMatrix_).
  • Loading branch information
brian-kelley committed Jul 12, 2022
1 parent d673571 commit c7fe1f6
Show file tree
Hide file tree
Showing 15 changed files with 1,495 additions and 188 deletions.
1 change: 1 addition & 0 deletions packages/ifpack2/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ IF(Ifpack2_ENABLE_EXPLICIT_INSTANTIATION)
SparsityFilter
ContainerFactory
TriDiContainer
Details::AdditiveSchwarzFilter
Details::Chebyshev
Details::ChebyshevKernel
Details::DenseSolver
Expand Down
9 changes: 8 additions & 1 deletion packages/ifpack2/src/Ifpack2_AdditiveSchwarz_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
#include "Ifpack2_Preconditioner.hpp"
#include "Ifpack2_Details_CanChangeMatrix.hpp"
#include "Ifpack2_Details_NestedPreconditioner.hpp"
#include "Ifpack2_OverlappingRowMatrix.hpp"
#include "Tpetra_Map.hpp"
#include "Tpetra_MultiVector.hpp"
#include "Tpetra_RowMatrix.hpp"
Expand Down Expand Up @@ -329,6 +330,12 @@ class AdditiveSchwarz :
using row_matrix_type =
Tpetra::RowMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;

//! The Tpetra::CrsMatrix specialization that is a subclass of MatrixType.
using crs_matrix_type =
Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;

//@}
// \name Constructors and destructor
//@{
Expand Down Expand Up @@ -774,7 +781,7 @@ class AdditiveSchwarz :
/// \brief The overlapping matrix.
///
/// If nonnull, this is an instance of OverlappingRowMatrix<row_matrix_type>.
Teuchos::RCP<row_matrix_type> OverlappingMatrix_;
Teuchos::RCP<OverlappingRowMatrix<row_matrix_type>> OverlappingMatrix_;

//! The reordered matrix.
Teuchos::RCP<row_matrix_type> ReorderedLocalizedMatrix_;
Expand Down
454 changes: 297 additions & 157 deletions packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp

Large diffs are not rendered by default.

412 changes: 412 additions & 0 deletions packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp

Large diffs are not rendered by default.

729 changes: 729 additions & 0 deletions packages/ifpack2/src/Ifpack2_Details_AdditiveSchwarzFilter_def.hpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions packages/ifpack2/src/Ifpack2_IlukGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize()

// Get Maximum Row length
const int MaxNumIndices = OverlapGraph_->getLocalMaxNumRowEntries ();
std::cout << "IlukGraph: OverlapGraph says max num row entries is " << MaxNumIndices << '\n';

// FIXME (mfh 23 Dec 2013) Use size_t or whatever
// getLocalNumElements() returns, instead of ptrdiff_t.
Expand Down
12 changes: 7 additions & 5 deletions packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class OverlappingRowMatrix :

using row_matrix_type = Tpetra::RowMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;
using crs_matrix_type = Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
global_ordinal_type, node_type>;

// device typedefs
typedef typename MatrixType::node_type::device_type device_type;
Expand All @@ -83,8 +85,6 @@ class OverlappingRowMatrix :
typedef typename MatrixType::global_inds_device_view_type global_inds_device_view_type;
typedef typename MatrixType::values_device_view_type values_device_view_type;

typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;

static_assert(std::is_same<MatrixType, row_matrix_type>::value, "Ifpack2::OverlappingRowMatrix: The template parameter MatrixType must be a Tpetra::RowMatrix specialization. Please don't use Tpetra::CrsMatrix (a subclass of Tpetra::RowMatrix) here anymore. The constructor can take either a RowMatrix or a CrsMatrix just fine.");

typedef typename row_matrix_type::mag_type mag_type;
Expand Down Expand Up @@ -350,13 +350,15 @@ class OverlappingRowMatrix :

void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const;

virtual Teuchos::RCP<const row_matrix_type> getUnderlyingMatrix() const;
Teuchos::RCP<const crs_matrix_type> getUnderlyingMatrix() const;

const typename crs_matrix_type::local_matrix_device_type getExtMatrix() const;
Teuchos::RCP<const crs_matrix_type> getExtMatrix() const;

Kokkos::View<size_t*, typename OverlappingRowMatrix<MatrixType>::device_type> getExtHaloStarts() const;
typename Kokkos::View<size_t*, typename OverlappingRowMatrix<MatrixType>::device_type>::HostMirror getExtHaloStartsHost() const;

void doExtImport();

private:
typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> import_type;
Expand All @@ -378,7 +380,7 @@ class OverlappingRowMatrix :
Teuchos::RCP<const import_type> Importer_;

//! The matrix containing only the overlap rows.
Teuchos::RCP<const crs_matrix_type> ExtMatrix_;
Teuchos::RCP<crs_matrix_type> ExtMatrix_;
Teuchos::RCP<const map_type> ExtMap_;
Teuchos::RCP<const import_type> ExtImporter_;
Kokkos::View<size_t*, device_type> ExtHaloStarts_;
Expand Down
27 changes: 19 additions & 8 deletions packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
Array<global_ordinal_type> mylist (size);
size_t count = 0;

{

Teuchos::TimeMonitor t(*Teuchos::TimeMonitor::getNewTimer("OverlappingRowMatrix n^2 loop??"));

// define the set of rows that are in ColMap but not in RowMap
for (local_ordinal_type i = 0 ; (size_t) i < ColMap->getLocalNumElements() ; ++i) {
const global_ordinal_type GID = ColMap->getGlobalElement (i);
Expand All @@ -129,6 +133,7 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
}
}
}
}

// On last import round, TmpMap, TmpGraph, and TmpImporter are unneeded,
// so don't build them.
Expand Down Expand Up @@ -174,11 +179,9 @@ OverlappingRowMatrix (const Teuchos::RCP<const row_matrix_type>& A,
ExtImporter_ = rcp (new import_type (A_->getRowMap (), ExtMap_));

{
RCP<crs_matrix_type> ExtMatrix_nc =
rcp (new crs_matrix_type (ExtMap_, ColMap_, 0));
ExtMatrix_nc->doImport (*A_, *ExtImporter_, Tpetra::INSERT);
ExtMatrix_nc->fillComplete (A_->getDomainMap (), RowMap_);
ExtMatrix_ = ExtMatrix_nc; // we only need the const version after here
ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0));
ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT);
ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_);
}

// fix indices for overlapping matrix
Expand Down Expand Up @@ -801,17 +804,17 @@ void OverlappingRowMatrix<MatrixType>::describe(Teuchos::FancyOStream &out,
}

template<class MatrixType>
Teuchos::RCP<const Tpetra::RowMatrix<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type> >
Teuchos::RCP<const Tpetra::CrsMatrix<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type> >
OverlappingRowMatrix<MatrixType>::getUnderlyingMatrix() const
{
return A_;
}

template<class MatrixType>
const typename OverlappingRowMatrix<MatrixType>::crs_matrix_type::local_matrix_device_type
Teuchos::RCP<const Tpetra::CrsMatrix<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type> >
OverlappingRowMatrix<MatrixType>::getExtMatrix() const
{
return ExtMatrix_->getLocalMatrixDevice();
return ExtMatrix_;
}

template<class MatrixType>
Expand All @@ -828,6 +831,14 @@ OverlappingRowMatrix<MatrixType>::getExtHaloStartsHost() const
return ExtHaloStarts_h;
}

template<class MatrixType>
void OverlappingRowMatrix<MatrixType>::doExtImport()
{
ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0));
ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT);
ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_);
}

} // namespace Ifpack2

#define IFPACK2_OVERLAPPINGROWMATRIX_INSTANT(S,LO,GO,N) \
Expand Down
9 changes: 9 additions & 0 deletions packages/ifpack2/src/Ifpack2_RILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ RILUK<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A
// directly.
if (A->getRowMap ()->getComm ()->getSize () == 1 ||
A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
std::cout << "RILUK: using input A directly because it's already local.\n";
return A;
}

Expand Down Expand Up @@ -536,6 +537,10 @@ void RILUK<MatrixType>::initialize ()
A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
}
else
{
std::cout << "RILUK initialize: not creating A_local_crs here because A is already a CrsMatrix.\n";
}
Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (A_local_crs->getCrsGraph (),
LevelOfFill_, 0, Overalloc_));
}
Expand Down Expand Up @@ -930,6 +935,10 @@ void RILUK<MatrixType>::compute ()
A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
}
else
{
std::cout << "RILUK compute: not creating A_local_crs here because A is already a CrsMatrix.\n";
}
auto lclMtx = A_local_crs->getLocalMatrixDevice();
A_local_rowmap_ = lclMtx.graph.row_map;
A_local_entries_ = lclMtx.graph.entries;
Expand Down
4 changes: 4 additions & 0 deletions packages/ifpack2/test/belos/build_precond.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,20 @@ build_precond (Teuchos::ParameterList& test_params,
OSTab tab (*out);
prec->setParameters (tif_params);
{
std::cout << "Driver: starting precond init (rank " << myRank << ")" << std::endl;
Teuchos::TimeMonitor timeMon (timer_init);
prec->initialize ();
std::cout << "Driver: done with precond init (rank " << myRank << ")" << std::endl;
comm->barrier();
}
if (myRank == 0) {
*out << "Time Init: " << timer_init.totalElapsedTime() << endl;
}
{
std::cout << "Driver: starting precond compute (rank " << myRank << ")" << std::endl;
Teuchos::TimeMonitor timeMon (timer);
prec->compute ();
std::cout << "Driver: done with precond compute (rank " << myRank << ")" << std::endl;
comm->barrier();
}
if (myRank == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,11 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(Ifpack2AdditiveSchwarz, Test0, Scalar, LocalOr
params.set ("schwarz: combine mode", "Zero");

#if defined(HAVE_IFPACK2_XPETRA) && defined(HAVE_IFPACK2_ZOLTAN2)
std::cout << "Test0: Enabling reordering!\n";
params.set ("schwarz: use reordering", true);
params.set ("schwarz: reordering list", zlist);
#else
std::cout << "Test0: NOT enabling reordering!\n";
params.set ("schwarz: use reordering", false);
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,8 @@ localSolve (Tpetra::MultiVector<
using Teuchos::CONJ_TRANS;
using Teuchos::NO_TRANS;
using Teuchos::TRANS;
using MV = Tpetra::MultiVector<
typename CrsMatrixType::scalar_type,
typename CrsMatrixType::local_ordinal_type,
typename CrsMatrixType::global_ordinal_type,
typename CrsMatrixType::node_type>;
using scalar_type = typename CrsMatrixType::scalar_type;
using STS = Teuchos::ScalarTraits<scalar_type>;
using device_type = typename CrsMatrixType::device_type;
using dev_memory_space = typename device_type::memory_space;
const char prefix[] = "localSolve: ";

TEUCHOS_TEST_FOR_EXCEPTION
Expand Down Expand Up @@ -999,12 +992,6 @@ void testArrowMatrix (bool& success, Teuchos::FancyOStream& out)
L,U,out);
if(!gblSuccess) return;

typedef typename crs_matrix_type::local_graph_device_type local_graph_type;
typedef typename crs_matrix_type::local_matrix_device_type local_matrix_type;
typedef typename local_matrix_type::row_map_type::non_const_type row_offsets_type;
typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
typedef typename local_matrix_type::values_type::non_const_type values_type;

typedef typename crs_matrix_type::local_inds_host_view_type const_local_inds_type;
typedef typename crs_matrix_type::values_host_view_type const_values_type;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ void reducedMatvec(const OverlappedMatrixClass & A,
throw std::runtime_error("reducedMatvec: Exceeded available overlap");

auto undA_lcl = undA->getLocalMatrixDevice ();
auto extA_lcl = A.getExtMatrix();
auto extA_lcl = A.getExtMatrix()->getLocalMatrixDevice();
auto X_lcl = X.getLocalViewDevice (Tpetra::Access::ReadOnly);
auto Y_lcl = Y.getLocalViewDevice (Tpetra::Access::OverwriteAll);

Expand All @@ -242,7 +242,7 @@ void reducedMatvec(const OverlappedMatrixClass & A,
int yrange = hstarts[overlapLevel];
auto Y_ext = Kokkos::subview(Y_lcl,std::make_pair(numLocalRows,numLocalRows+yrange),Kokkos::ALL());

int xlimit = ( (overlapLevel == hstarts.size()-1) ? X_lcl.extent(0) : numLocalRows+hstarts[overlapLevel+1] );
int xlimit = ( (overlapLevel == (int) hstarts.size()-1) ? X_lcl.extent(0) : numLocalRows+hstarts[overlapLevel+1] );
auto X_ext = Kokkos::subview(X_lcl,std::make_pair(0,xlimit),Kokkos::ALL());

localReducedMatvec(extA_lcl,X_ext,yrange,Y_ext);
Expand Down
4 changes: 2 additions & 2 deletions packages/tpetra/core/src/Tpetra_CrsGraph_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1326,7 +1326,7 @@ namespace Tpetra {
///
/// \pre columnIndices are sorted within rows
/// \pre <tt>hasColMap() == true</tt>
/// \pre <tt>rowPointers.size() != getLocalNumRows()+1</tt>
/// \pre <tt>rowPointers.size() == getLocalNumRows()+1</tt>
/// \pre No insert routines have been called.
///
/// \warning This method is intended for expert developer use
Expand All @@ -1339,7 +1339,7 @@ namespace Tpetra {
///
/// \pre columnIndices are sorted within rows
/// \pre <tt>hasColMap() == true</tt>
/// \pre <tt>rowPointers.size() != getLocalNumRows()+1</tt>
/// \pre <tt>rowPointers.size() == getLocalNumRows()+1</tt>
/// \pre No insert routines have been called.
///
/// \warning This method is intended for expert developer use
Expand Down
2 changes: 2 additions & 0 deletions packages/tpetra/core/src/Tpetra_CrsGraph_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2720,13 +2720,15 @@ namespace Tpetra {
}
}

/*
// FIXME (mfh 07 Aug 2014) We need to relax this restriction,
// since the future model will be allocation at construction, not
// lazy allocation on first insert.
TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
((this->lclIndsUnpacked_wdv.extent (0) != 0 || this->gblInds_wdv.extent (0) != 0),
std::runtime_error, "You may not call this method if 1-D data "
"structures are already allocated.");
*/

indicesAreAllocated_ = true;
indicesAreLocal_ = true;
Expand Down

0 comments on commit c7fe1f6

Please sign in to comment.