Skip to content

Commit

Permalink
MueLu: rm regionMG MatVec w/ double communication
Browse files Browse the repository at this point in the history
We have recently added a "FastMatVec", that precomputes the
communication patter for region MG MatVecs to replace the "old"
implementation, that performed two rounds of communication during a
MatVec. Now, we have enough confidence in the new implementation to get
rid of the old one.
  • Loading branch information
mayrmt committed Aug 12, 2020
1 parent f940ae0 commit d8461b0
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 217 deletions.
35 changes: 2 additions & 33 deletions packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1014,19 +1014,13 @@ void vCycle(const int l, ///< ID of current level
tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 2 - compute residual")));

const bool useFastMatVec = smootherParams[l]->get<bool>("Use fast MatVec");

Array<RCP<Vector> > regRes(maxRegPerProc);
if(useCachedVectors) {
regRes = levelList.get<Teuchos::Array<RCP<Vector> > >("residual");
} else {
createRegionalVector(regRes, regRowMaps[l]);
}
if (useFastMatVec)
computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]);
else
computeResidual(regRes, fineRegX, fineRegB, regMatrices[l],
regRowMaps[l], regRowImporters[l]);
computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]);

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 3 - scale interface")));
Expand All @@ -1039,7 +1033,7 @@ void vCycle(const int l, ///< ID of current level
// Transfer to coarse level
Array<RCP<Vector> > coarseRegX(maxRegPerProc);
Array<RCP<Vector> > coarseRegB(maxRegPerProc);
if (useFastMatVec)

{
// Get pre-communicated communication patterns for the fast MatVec
const ArrayRCP<LocalOrdinal> regionInterfaceLIDs = smootherParams[l+1]->get<ArrayRCP<LO>>("Fast MatVec: interface LIDs");
Expand All @@ -1055,22 +1049,6 @@ void vCycle(const int l, ///< ID of current level
// TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap()));
}
}
else
{
for (int j = 0; j < maxRegPerProc; j++) {
coarseRegX[j] = VectorFactory::Build(regRowMaps[l+1][j], true);
coarseRegB[j] = VectorFactory::Build(regRowMaps[l+1][j], true);

regProlong[l+1][j]->apply(*regRes[j], *coarseRegB[j], Teuchos::TRANS);
// TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regRes[j]->getMap()));
// TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap()));
}

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 5 - sum interface values")));

sumInterfaceValues(coarseRegB, regRowMaps[l+1], regRowImporters[l+1]);
}

tm = Teuchos::null;
bool coarseZeroInitGuess = true;
Expand All @@ -1085,7 +1063,6 @@ void vCycle(const int l, ///< ID of current level

// Transfer coarse level correction to fine level
Array<RCP<Vector> > regCorrection(maxRegPerProc);
if (useFastMatVec)
{
// Get pre-communicated communication patterns for the fast MatVec
const ArrayRCP<LocalOrdinal> regionInterfaceLIDs = smootherParams[l]->get<ArrayRCP<LO>>("Fast MatVec: interface LIDs");
Expand All @@ -1099,14 +1076,6 @@ void vCycle(const int l, ///< ID of current level
// TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap()));
}
}
else{
for (int j = 0; j < maxRegPerProc; j++) {
regCorrection[j] = VectorFactory::Build(regRowMaps[l][j], true);
regProlong[l+1][j]->apply(*coarseRegX[j], *regCorrection[j]);
// TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegX[j]->getMap()));
// TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap()));
}
}

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 7 - add coarse grid correction")));
Expand Down
51 changes: 0 additions & 51 deletions packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,57 +719,6 @@ void regionalToComposite(const std::vector<RCP<Xpetra::Matrix<Scalar, LocalOrdin
return;
} // regionalToComposite

/*! \brief Compute the residual \f$r = b - Ax\f$ using two rounds of communication
*
* The residual is computed based on matrices and vectors in a regional layout.
* 1. Compute y = A*x in regional layout.
* 2. Sum interface values of y to account for duplication of interface DOFs.
* 3. Compute r = b - y
*/
template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > >
computeResidual(Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > >& regRes, ///< residual (to be evaluated)
const Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > regX, ///< left-hand side (solution)
const Array<RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > regB, ///< right-hand side (forcing term)
const std::vector<RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > > regionGrpMats,
const std::vector<RCP<Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node> > > revisedRowMapPerGrp, ///< revised row maps in region layout [in] (actually extracted from regionGrpMats)
const std::vector<RCP<Xpetra::Import<LocalOrdinal, GlobalOrdinal, Node> > > rowImportPerGrp ///< row importer in region layout [in]
)
{
#include "Xpetra_UseShortNames.hpp"
using Teuchos::TimeMonitor;
const int maxRegPerProc = regX.size();

RCP<TimeMonitor> tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 1 - Rreg = Areg*Xreg")));

/* Update the residual vector
* 1. Compute tmp = A * regX in each region
* 2. Sum interface values in tmp due to duplication (We fake this by scaling to reverse the basic splitting)
* 3. Compute r = B - tmp
*/
for (int j = 0; j < maxRegPerProc; j++) { // step 1
regionGrpMats[j]->apply(*regX[j], *regRes[j]);
// TEUCHOS_ASSERT(regionGrpMats[j]->getDomainMap()->isSameAs(*regX[j]->getMap()));
// TEUCHOS_ASSERT(regionGrpMats[j]->getRangeMap()->isSameAs(*regRes[j]->getMap()));
}

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 2 - sumInterfaceValues")));

sumInterfaceValues(regRes, revisedRowMapPerGrp, rowImportPerGrp);

tm = Teuchos::null;
tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 3 - Rreg = Breg - Rreg")));

for (int j = 0; j < maxRegPerProc; j++) { // step 3
regRes[j]->update(1.0, *regB[j], -1.0);
// TEUCHOS_ASSERT(regRes[j]->getMap()->isSameAs(*regB[j]->getMap()));
}

tm = Teuchos::null;

return regRes;
} // computeResidual

/*! \brief Compute local data needed to perform a MatVec in region format
Expand Down
28 changes: 8 additions & 20 deletions packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ void jacobiIterate(RCP<Teuchos::ParameterList> smootherParams,
const int maxIter = smootherParams->get<int> ("smoother: sweeps");
const double damping = smootherParams->get<double>("smoother: damping");
Array<RCP<Vector> > diag_inv = smootherParams->get<Array<RCP<Vector> > >("smoothers: inverse diagonal");
const bool useFastMatVec = smootherParams->get<bool>("Use fast MatVec");

Array<RCP<Vector> > regRes(maxRegPerProc);
createRegionalVector(regRes, revisedRowMapPerGrp);
Expand All @@ -168,10 +167,7 @@ void jacobiIterate(RCP<Teuchos::ParameterList> smootherParams,
}
}
else {
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);

// update solution according to Jacobi's method
for (int j = 0; j < maxRegPerProc; j++) {
Expand Down Expand Up @@ -212,7 +208,6 @@ void GSIterate(RCP<Teuchos::ParameterList> smootherParams,
const int maxIter = smootherParams->get<int>("smoother: sweeps");
const double damping = smootherParams->get<double>("smoother: damping");
Teuchos::Array<RCP<Vector> > diag_inv = smootherParams->get<Teuchos::Array<RCP<Vector> > >("smoothers: inverse diagonal");
const bool useFastMatVec = smootherParams->get<bool>("Use fast MatVec");

Array<RCP<Vector> > regRes(maxRegPerProc);
createRegionalVector(regRes, revisedRowMapPerGrp);
Expand All @@ -225,10 +220,7 @@ void GSIterate(RCP<Teuchos::ParameterList> smootherParams,
// Update the residual vector
if (!zeroInitGuess)
{
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
}

// update the solution and the residual
Expand Down Expand Up @@ -441,7 +433,6 @@ void chebyshevIterate(RCP<Teuchos::ParameterList> smootherParams,
const Scalar lambdaMax = smootherParams->get<Scalar>("chebyshev: lambda max");
const Scalar boostFactor = smootherParams->get<double>("smoother: Chebyshev boost factor");
Teuchos::Array<RCP<Vector> > diag_inv = smootherParams->get<Teuchos::Array<RCP<Vector> > >("smoothers: inverse diagonal");
const bool useFastMatVec = smootherParams->get<bool>("Use fast MatVec");

// Define some constants for convenience
const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
Expand Down Expand Up @@ -479,11 +470,10 @@ void chebyshevIterate(RCP<Teuchos::ParameterList> smootherParams,
regX[j]->update(SC_ONE, *regP[j], SC_ZERO); // X = 0 + P
}
}
else { // Compute residual vector
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
else {
// Compute residual vector
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);

for(int j = 0; j < maxRegPerProc; j++) {
regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO); // z = D_inv * R, that is, D \ R.
regP[j]->update(SC_ONE/theta, *regZ[j], SC_ZERO); // P = 1/theta Z
Expand All @@ -494,10 +484,8 @@ void chebyshevIterate(RCP<Teuchos::ParameterList> smootherParams,
// The rest of the iterations
for (int i = 1; i < maxIter; ++i) {
// Compute residual vector
if (useFastMatVec)
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);
else
computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp);
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams);

// z = D_inv * R, that is, D \ R.
for(int j = 0; j < maxRegPerProc; j++) {
regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar
int cacheSize = 0; clp.setOption("cachesize", &cacheSize, "cache size (in KB)");
bool useStackedTimer = false; clp.setOption("stacked-timer","no-stacked-timer", &useStackedTimer, "use stacked timer");
bool showTimerSummary = true; clp.setOption("show-timer-summary", "no-show-timer-summary", &showTimerSummary, "Switch on/off the timer summary at the end of the run.");
bool useFastMatVec = true; clp.setOption("fastMV", "no-fastMV", &useFastMatVec, "Use the fast MatVec implementation (or not)");
std::string cycleType = "V"; clp.setOption("cycleType", &cycleType, "{Multigrid cycle type. Possible values: V, W.");

clp.recogniseAllOptions(true);
Expand Down Expand Up @@ -730,7 +729,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar
// Set data for fast MatVec
// for (auto levelSmootherParams : smootherParams) {
for(LO levelIdx = 0; levelIdx < numLevels; ++levelIdx) {
smootherParams[levelIdx]->set("Use fast MatVec", useFastMatVec);
smootherParams[levelIdx]->set("Fast MatVec: interface LIDs",
regionMatVecLIDsPerLevel[levelIdx]);
smootherParams[levelIdx]->set("Fast MatVec: interface importer",
Expand Down Expand Up @@ -858,19 +856,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar
////////////////////////////////////////////////////////////////////////
// SWITCH BACK TO NON-LEVEL VARIABLES
////////////////////////////////////////////////////////////////////////
{
if (useFastMatVec)
{
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]);
}
else
{
computeResidual(regRes, regX, regB, regionGrpMats,
revisedRowMapPerGrp, rowImportPerGrp);
}

scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true);
}
computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]);
scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true);

compRes = VectorFactory::Build(dofMap, true);
regionalToComposite(regRes, compRes, rowImportPerGrp);
Expand Down
98 changes: 0 additions & 98 deletions packages/muelu/test/unit_tests/RegionMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,103 +645,6 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(RegionMatrix, RegionToCompositeMatrix, Scalar,

} // RegionToCompositeMatrix

// This test aims at checking that apply regionA to a region vector has the same effect as
// applying A to a composite vector. Of course the region vector needs to be brought back
// to composite formate before verifying the equivalence.
TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(RegionMatrix, MatVec, Scalar, LocalOrdinal, GlobalOrdinal, Node)
{
# include "MueLu_UseShortNames.hpp"
MUELU_TESTING_SET_OSTREAM;
MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,Node);

using TST = Teuchos::ScalarTraits<SC>;
using magnitude_type = typename TST::magnitudeType;
using TMT = Teuchos::ScalarTraits<magnitude_type>;
using real_type = typename TST::coordinateType;
using RealValuedMultiVector = Xpetra::MultiVector<real_type,LO,GO,NO>;
using test_factory = TestHelpers::TestFactory<SC, LO, GO, NO>;

out << "version: " << MueLu::Version() << std::endl;

// Get MPI parameter
RCP<const Teuchos::Comm<int> > comm = TestHelpers::Parameters::getDefaultComm();

GO nx = 5, ny = 5, nz = 1;
Teuchos::CommandLineProcessor &clp = Teuchos::UnitTestRepository::getCLP();
Galeri::Xpetra::Parameters<GO> galeriParameters(clp, nx, ny, nz, "Laplace2D");
Teuchos::ParameterList galeriList = galeriParameters.GetParameterList();
std::string matrixType = galeriParameters.GetMatrixType();

// Build maps for the problem
const LO numDofsPerNode = 1;
RCP<Map> nodeMap = Galeri::Xpetra::CreateMap<LO, GO, Node>(TestHelpers::Parameters::getLib(),
"Cartesian2D", comm, galeriList);
RCP<Map> dofMap = Xpetra::MapFactory<LO,GO,Node>::Build(nodeMap, numDofsPerNode);

// Build the Xpetra problem
RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
Galeri::Xpetra::BuildProblem<SC,LO,GO,Map,CrsMatrixWrap,MultiVector>(galeriParameters.GetMatrixType(), dofMap, galeriList);

// Generate the operator
RCP<Matrix> A = Pr->BuildMatrix();
A->SetFixedBlockSize(numDofsPerNode);

// Create auxiliary data for MG
RCP<MultiVector> nullspace = Pr->BuildNullspace();
RCP<RealValuedMultiVector> coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<double,LO,GO,Map,RealValuedMultiVector>("2D", nodeMap, galeriList);

// create the region maps, importer and operator from composite counter parts
const int maxRegPerProc = 1;
std::vector<RCP<Map> > rowMapPerGrp(maxRegPerProc), colMapPerGrp(maxRegPerProc);
std::vector<RCP<Map> > revisedRowMapPerGrp(maxRegPerProc), revisedColMapPerGrp(maxRegPerProc);
std::vector<RCP<Import> > rowImportPerGrp(maxRegPerProc), colImportPerGrp(maxRegPerProc);
std::vector<RCP<Matrix> > regionGrpMats(maxRegPerProc);
Teuchos::ArrayRCP<LO> regionMatVecLIDs;
RCP<Import> regionInterfaceImporter;
createRegionMatrix(galeriList, numDofsPerNode, maxRegPerProc, nodeMap, dofMap, A,
rowMapPerGrp, colMapPerGrp, revisedRowMapPerGrp, revisedColMapPerGrp,
rowImportPerGrp, colImportPerGrp, regionGrpMats,
regionMatVecLIDs, regionInterfaceImporter);

RCP<Matrix> regionMat = regionGrpMats[0];

// Create initial vectors in composite format and apply composite A.
// This will give a reference to compare with.
RCP<Vector> X = VectorFactory::Build(dofMap);
RCP<Vector> B = VectorFactory::Build(dofMap);

// we set seed for reproducibility
Utilities::SetRandomSeed(*comm);
X->randomize();

// Perform composite MatVec
A->apply(*X, *B, Teuchos::NO_TRANS, TST::one(), TST::zero());

// Create the region vectors and apply region A
Array<RCP<Vector> > quasiRegX(maxRegPerProc);
Array<RCP<Vector> > quasiRegB(maxRegPerProc);
Array<RCP<Vector> > regX(maxRegPerProc);
Array<RCP<Vector> > regB(maxRegPerProc);
compositeToRegional(X, quasiRegX, regX,
revisedRowMapPerGrp, rowImportPerGrp);
regB[0] = VectorFactory::Build(revisedRowMapPerGrp[0], true);
regionMat->apply(*regX[0], *regB[0], Teuchos::NO_TRANS, TST::one(), TST::zero());

// Now create composite B using region B so we can compare
// composite B with the original B
RCP<Vector> compB = VectorFactory::Build(dofMap);
regionalToComposite(regB, compB, rowImportPerGrp);

// Extract the data from B and compB to compare it
ArrayRCP<const SC> dataB = B->getData(0);
ArrayRCP<const SC> dataCompB = compB->getData(0);
for(size_t idx = 0; idx < B->getLocalLength(); ++idx) {
TEST_FLOATING_EQUALITY(TST::magnitude(dataB[idx]),
TST::magnitude(dataCompB[idx]),
100*TMT::eps());
}

} // MatVec

// This test aims at checking that apply regionA to a region vector has the same effect as
// applying A to a composite vector. Of course the region vector needs to be brought back
Expand Down Expand Up @@ -1671,7 +1574,6 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(RegionMatrix, Laplace3D, Scalar, LocalOrdinal,
# define MUELU_ETI_GROUP(Scalar, LO, GO, Node) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,CompositeToRegionMatrix,Scalar,LO,GO,Node) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,RegionToCompositeMatrix,Scalar,LO,GO,Node) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,MatVec,Scalar,LO,GO,Node) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,FastMatVec,Scalar,LO,GO,Node) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,FastMatVec3D,Scalar,LO,GO,Node) \
TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,FastMatVec2D_Elasticity,Scalar,LO,GO,Node) \
Expand Down

0 comments on commit d8461b0

Please sign in to comment.