Skip to content

Commit

Permalink
MueLu: rm regionMG MatVec w/ double communication
Browse files Browse the repository at this point in the history
We have recently added a "FastMatVec", that precomputes the
communication patter for region MG MatVecs to replace the "old"
implementation, that performed two rounds of communication during a
MatVec. Now, we have enough confidence in the new implementation to get
rid of the old one.
  • Loading branch information
mayrmt committed Aug 12, 2020
1 parent 61cc54c commit b3b5f88
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 119 deletions.
35 changes: 2 additions & 33 deletions packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1014,19 +1014,13 @@ void vCycle(const int l, ///< ID of current level
tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 2 - compute residual")));

const bool useFastMatVec = smootherParams[l]->get<bool>("Use fast MatVec");

Array<RCP<Vector> > regRes(maxRegPerProc);
if(useCachedVectors) {
regRes = levelList.get<Teuchos::Array<RCP<Vector> > >("residual");
} else {
createRegionalVector(regRes, regRowMaps[l]);
}
if (useFastMatVec)
computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]);
else
computeResidual(regRes, fineRegX, fineRegB, regMatrices[l],
regRowMaps[l], regRowImporters[l]);
computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]);

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 3 - scale interface")));
Expand All @@ -1039,7 +1033,7 @@ void vCycle(const int l, ///< ID of current level
// Transfer to coarse level
Array<RCP<Vector> > coarseRegX(maxRegPerProc);
Array<RCP<Vector> > coarseRegB(maxRegPerProc);
if (useFastMatVec)

{
// Get pre-communicated communication patterns for the fast MatVec
const ArrayRCP<LocalOrdinal> regionInterfaceLIDs = smootherParams[l+1]->get<ArrayRCP<LO>>("Fast MatVec: interface LIDs");
Expand All @@ -1055,22 +1049,6 @@ void vCycle(const int l, ///< ID of current level
// TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap()));
}
}
else
{
for (int j = 0; j < maxRegPerProc; j++) {
coarseRegX[j] = VectorFactory::Build(regRowMaps[l+1][j], true);
coarseRegB[j] = VectorFactory::Build(regRowMaps[l+1][j], true);

regProlong[l+1][j]->apply(*regRes[j], *coarseRegB[j], Teuchos::TRANS);
// TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regRes[j]->getMap()));
// TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap()));
}

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 5 - sum interface values")));

sumInterfaceValues(coarseRegB, regRowMaps[l+1], regRowImporters[l+1]);
}

tm = Teuchos::null;
bool coarseZeroInitGuess = true;
Expand All @@ -1085,7 +1063,6 @@ void vCycle(const int l, ///< ID of current level

// Transfer coarse level correction to fine level
Array<RCP<Vector> > regCorrection(maxRegPerProc);
if (useFastMatVec)
{
// Get pre-communicated communication patterns for the fast MatVec
const ArrayRCP<LocalOrdinal> regionInterfaceLIDs = smootherParams[l]->get<ArrayRCP<LO>>("Fast MatVec: interface LIDs");
Expand All @@ -1099,14 +1076,6 @@ void vCycle(const int l, ///< ID of current level
// TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap()));
}
}
else{
for (int j = 0; j < maxRegPerProc; j++) {
regCorrection[j] = VectorFactory::Build(regRowMaps[l][j], true);
regProlong[l+1][j]->apply(*coarseRegX[j], *regCorrection[j]);
// TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegX[j]->getMap()));
// TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap()));
}
}

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 7 - add coarse grid correction")));
Expand Down
51 changes: 0 additions & 51 deletions packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,57 +719,6 @@ void regionalToComposite(const std::vector<RCP<Xpetra::Matrix<Scalar, LocalOrdin
return;
} // regionalToComposite

/*! \brief Compute the residual \f$r = b - Ax\f$ using two rounds of communication
*
* The residual is computed based on matrices and vectors in a regional layout.
* 1. Compute y = A*x in regional layout.
* 2. Sum interface values of y to account for duplication of interface DOFs.
* 3. Compute r = b - y
*/
template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > >
computeResidual(Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > >& regRes, ///< residual (to be evaluated)
const Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > regX, ///< left-hand side (solution)
const Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > regB, ///< right-hand side (forcing term)
const std::vector<RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > regionGrpMats,
const std::vector<RCP<Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> > > revisedRowMapPerGrp, ///< revised row maps in region layout [in] (actually extracted from regionGrpMats)
const std::vector<RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node> > > rowImportPerGrp ///< row importer in region layout [in]
)
{
#include "Xpetra_UseShortNames.hpp"
using Teuchos::TimeMonitor;
const int maxRegPerProc = regX.size();

RCP<TimeMonitor> tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 1 - Rreg = Areg*Xreg")));

/* Update the residual vector
* 1. Compute tmp = A * regX in each region
* 2. Sum interface values in tmp due to duplication (We fake this by scaling to reverse the basic splitting)
* 3. Compute r = B - tmp
*/
for (int j = 0; j < maxRegPerProc; j++) { // step 1
regionGrpMats[j]->apply(*regX[j], *regRes[j]);
// TEUCHOS_ASSERT(regionGrpMats[j]->getDomainMap()->isSameAs(*regX[j]->getMap()));
// TEUCHOS_ASSERT(regionGrpMats[j]->getRangeMap()->isSameAs(*regRes[j]->getMap()));
}

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 2 - sumInterfaceValues")));

sumInterfaceValues(regRes, revisedRowMapPerGrp, rowImportPerGrp);

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 3 - Rreg = Breg - Rreg")));

for (int j = 0; j < maxRegPerProc; j++) { // step 3
regRes[j]->update(1.0, *regB[j], -1.0);
// TEUCHOS_ASSERT(regRes[j]->getMap()->isSameAs(*regB[j]->getMap()));
}

tm = Teuchos::null;

return regRes;
} // computeResidual

/*! \brief Compute local data needed to perform a MatVec in region format
Expand Down
28 changes: 8 additions & 20 deletions packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ void jacobiIterate(RCP<Teuchos::ParameterList> smootherParams,
const int maxIter = smootherParams->get<int> ("smoother: sweeps");
const double damping = smootherParams->get<double>("smoother: damping");
Array<RCP<Vector> > diag_inv = smootherParams->get<Array<RCP<Vector> > >("smoothers: inverse diagonal");
const bool useFastMatVec = smootherParams->get<bool>("Use fast MatVec");

Array<RCP<Vector> > regRes(maxRegPerProc);
createRegionalVector(regRes, revisedRowMapPerGrp);
Expand All @@ -168,10 +167,7 @@ void jacobiIterate(RCP<Teuchos::ParameterList> smootherParams,
}
}
else {
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);

// update solution according to Jacobi's method
for (int j = 0; j < maxRegPerProc; j++) {
Expand Down Expand Up @@ -212,7 +208,6 @@ void GSIterate(RCP<Teuchos::ParameterList> smootherParams,
const int maxIter = smootherParams->get<int>("smoother: sweeps");
const double damping = smootherParams->get<double>("smoother: damping");
Teuchos::Array<RCP<Vector> > diag_inv = smootherParams->get<Teuchos::Array<RCP<Vector> > >("smoothers: inverse diagonal");
const bool useFastMatVec = smootherParams->get<bool>("Use fast MatVec");

Array<RCP<Vector> > regRes(maxRegPerProc);
createRegionalVector(regRes, revisedRowMapPerGrp);
Expand All @@ -225,10 +220,7 @@ void GSIterate(RCP<Teuchos::ParameterList> smootherParams,
// Update the residual vector
if (!zeroInitGuess)
{
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
}

// update the solution and the residual
Expand Down Expand Up @@ -441,7 +433,6 @@ void chebyshevIterate(RCP<Teuchos::ParameterList> smootherParams,
const Scalar lambdaMax = smootherParams->get<Scalar>("chebyshev: lambda max");
const Scalar boostFactor = smootherParams->get<double>("smoother: Chebyshev boost factor");
Teuchos::Array<RCP<Vector> > diag_inv = smootherParams->get<Teuchos::Array<RCP<Vector> > >("smoothers: inverse diagonal");
const bool useFastMatVec = smootherParams->get<bool>("Use fast MatVec");

// Define some constants for convenience
const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
Expand Down Expand Up @@ -479,11 +470,10 @@ void chebyshevIterate(RCP<Teuchos::ParameterList> smootherParams,
regX[j]->update(SC_ONE, *regP[j], SC_ZERO); // X = 0 + P
}
}
else { // Compute residual vector
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
else {
// Compute residual vector
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);

for(int j = 0; j < maxRegPerProc; j++) {
regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO); // z = D_inv * R, that is, D \ R.
regP[j]->update(SC_ONE/theta, *regZ[j], SC_ZERO); // P = 1/theta Z
Expand All @@ -494,10 +484,8 @@ void chebyshevIterate(RCP<Teuchos::ParameterList> smootherParams,
// The rest of the iterations
for (int i = 1; i < maxIter; ++i) {
// Compute residual vector
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);

// z = D_inv * R, that is, D \ R.
for(int j = 0; j < maxRegPerProc; j++) {
regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar
int cacheSize = 0; clp.setOption("cachesize", &cacheSize, "cache size (in KB)");
bool useStackedTimer = false; clp.setOption("stacked-timer","no-stacked-timer", &useStackedTimer, "use stacked timer");
bool showTimerSummary = true; clp.setOption("show-timer-summary", "no-show-timer-summary", &showTimerSummary, "Switch on/off the timer summary at the end of the run.");
bool useFastMatVec = true; clp.setOption("fastMV", "no-fastMV", &useFastMatVec, "Use the fast MatVec implementation (or not)");
std::string cycleType = "V"; clp.setOption("cycleType", &cycleType, "{Multigrid cycle type. Possible values: V, W.");

clp.recogniseAllOptions(true);
Expand Down Expand Up @@ -730,7 +729,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar
// Set data for fast MatVec
// for (auto levelSmootherParams : smootherParams) {
for(LO levelIdx = 0; levelIdx < numLevels; ++levelIdx) {
smootherParams[levelIdx]->set("Use fast MatVec", useFastMatVec);
smootherParams[levelIdx]->set("Fast MatVec: interface LIDs",
regionMatVecLIDsPerLevel[levelIdx]);
smootherParams[levelIdx]->set("Fast MatVec: interface importer",
Expand Down Expand Up @@ -858,19 +856,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar
////////////////////////////////////////////////////////////////////////
// SWITCH BACK TO NON-LEVEL VARIABLES
////////////////////////////////////////////////////////////////////////
{
if (useFastMatVec)
{
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]);
}
else
{
computeResidual(regRes, regX, regB, regionGrpMats,
revisedRowMapPerGrp, rowImportPerGrp);
}

scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true);
}
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]);
scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true);

compRes = VectorFactory::Build(dofMap, true);
regionalToComposite(regRes, compRes, rowImportPerGrp);
Expand Down

0 comments on commit b3b5f88

Please sign in to comment.