From b3b5f88e0e8434970a577e77b20dcea7ff4469dc Mon Sep 17 00:00:00 2001 From: Matthias Mayr Date: Wed, 12 Aug 2020 09:45:30 +0200 Subject: [PATCH] MueLu: rm regionMG MatVec w/ double communication We have recently added a "FastMatVec", that precomputes the communication patter for region MG MatVecs to replace the "old" implementation, that performed two rounds of communication during a MatVec. Now, we have enough confidence in the new implementation to get rid of the old one. --- .../regionMG/src/SetupRegionHierarchy_def.hpp | 35 +------------ .../regionMG/src/SetupRegionMatrix_def.hpp | 51 ------------------- .../regionMG/src/SetupRegionSmoothers_def.hpp | 28 +++------- .../structured/Driver_Structured_Regions.cpp | 17 +------ 4 files changed, 12 insertions(+), 119 deletions(-) diff --git a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp index df89c68f7480..01f5be07c642 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp @@ -1014,19 +1014,13 @@ void vCycle(const int l, ///< ID of current level tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 2 - compute residual"))); - const bool useFastMatVec = smootherParams[l]->get("Use fast MatVec"); - Array > regRes(maxRegPerProc); if(useCachedVectors) { regRes = levelList.get > >("residual"); } else { createRegionalVector(regRes, regRowMaps[l]); } - if (useFastMatVec) - computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]); - else - computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], - regRowMaps[l], regRowImporters[l]); + computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]); tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 3 - scale interface"))); @@ -1039,7 +1033,7 @@ void vCycle(const int l, ///< ID of current level // Transfer to coarse level Array > coarseRegX(maxRegPerProc); Array > coarseRegB(maxRegPerProc); - if (useFastMatVec) + { // Get pre-communicated communication patterns for the fast MatVec const ArrayRCP regionInterfaceLIDs = smootherParams[l+1]->get>("Fast MatVec: interface LIDs"); @@ -1055,22 +1049,6 @@ void vCycle(const int l, ///< ID of current level // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap())); } } - else - { - for (int j = 0; j < maxRegPerProc; j++) { - coarseRegX[j] = VectorFactory::Build(regRowMaps[l+1][j], true); - coarseRegB[j] = VectorFactory::Build(regRowMaps[l+1][j], true); - - regProlong[l+1][j]->apply(*regRes[j], *coarseRegB[j], Teuchos::TRANS); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regRes[j]->getMap())); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap())); - } - - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 5 - sum interface values"))); - - sumInterfaceValues(coarseRegB, regRowMaps[l+1], regRowImporters[l+1]); - } tm = Teuchos::null; bool coarseZeroInitGuess = true; @@ -1085,7 +1063,6 @@ void vCycle(const int l, ///< ID of current level // Transfer coarse level correction to fine level Array > regCorrection(maxRegPerProc); - if (useFastMatVec) { // Get pre-communicated communication patterns for the fast MatVec const ArrayRCP regionInterfaceLIDs = smootherParams[l]->get>("Fast MatVec: interface LIDs"); @@ -1099,14 +1076,6 @@ void vCycle(const int l, ///< ID of current level // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap())); } } - else{ - for (int j = 0; j < maxRegPerProc; j++) { - regCorrection[j] = VectorFactory::Build(regRowMaps[l][j], true); - regProlong[l+1][j]->apply(*coarseRegX[j], *regCorrection[j]); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegX[j]->getMap())); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap())); - } - } tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 7 - add coarse grid correction"))); diff --git a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp index bfeb4112114e..6833dad7ea27 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp @@ -719,57 +719,6 @@ void regionalToComposite(const std::vector -Array > > -computeResidual(Array > >& regRes, ///< residual (to be evaluated) - const Array > > regX, ///< left-hand side (solution) - const Array > > regB, ///< right-hand side (forcing term) - const std::vector > > regionGrpMats, - const std::vector > > revisedRowMapPerGrp, ///< revised row maps in region layout [in] (actually extracted from regionGrpMats) - const std::vector > > rowImportPerGrp ///< row importer in region layout [in] - ) -{ -#include "Xpetra_UseShortNames.hpp" - using Teuchos::TimeMonitor; - const int maxRegPerProc = regX.size(); - - RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 1 - Rreg = Areg*Xreg"))); - - /* Update the residual vector - * 1. Compute tmp = A * regX in each region - * 2. Sum interface values in tmp due to duplication (We fake this by scaling to reverse the basic splitting) - * 3. Compute r = B - tmp - */ - for (int j = 0; j < maxRegPerProc; j++) { // step 1 - regionGrpMats[j]->apply(*regX[j], *regRes[j]); - // TEUCHOS_ASSERT(regionGrpMats[j]->getDomainMap()->isSameAs(*regX[j]->getMap())); - // TEUCHOS_ASSERT(regionGrpMats[j]->getRangeMap()->isSameAs(*regRes[j]->getMap())); - } - - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 2 - sumInterfaceValues"))); - - sumInterfaceValues(regRes, revisedRowMapPerGrp, rowImportPerGrp); - - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 3 - Rreg = Breg - Rreg"))); - - for (int j = 0; j < maxRegPerProc; j++) { // step 3 - regRes[j]->update(1.0, *regB[j], -1.0); - // TEUCHOS_ASSERT(regRes[j]->getMap()->isSameAs(*regB[j]->getMap())); - } - - tm = Teuchos::null; - - return regRes; -} // computeResidual /*! \brief Compute local data needed to perform a MatVec in region format diff --git a/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp index cd9d3510c830..4776e950f42a 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp @@ -153,7 +153,6 @@ void jacobiIterate(RCP smootherParams, const int maxIter = smootherParams->get ("smoother: sweeps"); const double damping = smootherParams->get("smoother: damping"); Array > diag_inv = smootherParams->get > >("smoothers: inverse diagonal"); - const bool useFastMatVec = smootherParams->get("Use fast MatVec"); Array > regRes(maxRegPerProc); createRegionalVector(regRes, revisedRowMapPerGrp); @@ -168,10 +167,7 @@ void jacobiIterate(RCP smootherParams, } } else { - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); // update solution according to Jacobi's method for (int j = 0; j < maxRegPerProc; j++) { @@ -212,7 +208,6 @@ void GSIterate(RCP smootherParams, const int maxIter = smootherParams->get("smoother: sweeps"); const double damping = smootherParams->get("smoother: damping"); Teuchos::Array > diag_inv = smootherParams->get > >("smoothers: inverse diagonal"); - const bool useFastMatVec = smootherParams->get("Use fast MatVec"); Array > regRes(maxRegPerProc); createRegionalVector(regRes, revisedRowMapPerGrp); @@ -225,10 +220,7 @@ void GSIterate(RCP smootherParams, // Update the residual vector if (!zeroInitGuess) { - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); } // update the solution and the residual @@ -441,7 +433,6 @@ void chebyshevIterate(RCP smootherParams, const Scalar lambdaMax = smootherParams->get("chebyshev: lambda max"); const Scalar boostFactor = smootherParams->get("smoother: Chebyshev boost factor"); Teuchos::Array > diag_inv = smootherParams->get > >("smoothers: inverse diagonal"); - const bool useFastMatVec = smootherParams->get("Use fast MatVec"); // Define some constants for convenience const Scalar SC_ZERO = Teuchos::ScalarTraits::zero(); @@ -479,11 +470,10 @@ void chebyshevIterate(RCP smootherParams, regX[j]->update(SC_ONE, *regP[j], SC_ZERO); // X = 0 + P } } - else { // Compute residual vector - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + else { + // Compute residual vector + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); + for(int j = 0; j < maxRegPerProc; j++) { regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO); // z = D_inv * R, that is, D \ R. regP[j]->update(SC_ONE/theta, *regZ[j], SC_ZERO); // P = 1/theta Z @@ -494,10 +484,8 @@ void chebyshevIterate(RCP smootherParams, // The rest of the iterations for (int i = 1; i < maxIter; ++i) { // Compute residual vector - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); + // z = D_inv * R, that is, D \ R. for(int j = 0; j < maxRegPerProc; j++) { regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO); diff --git a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp index 6b934100c4a4..2a5e9a080f1d 100644 --- a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp +++ b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp @@ -189,7 +189,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar int cacheSize = 0; clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); bool useStackedTimer = false; clp.setOption("stacked-timer","no-stacked-timer", &useStackedTimer, "use stacked timer"); bool showTimerSummary = true; clp.setOption("show-timer-summary", "no-show-timer-summary", &showTimerSummary, "Switch on/off the timer summary at the end of the run."); - bool useFastMatVec = true; clp.setOption("fastMV", "no-fastMV", &useFastMatVec, "Use the fast MatVec implementation (or not)"); std::string cycleType = "V"; clp.setOption("cycleType", &cycleType, "{Multigrid cycle type. Possible values: V, W."); clp.recogniseAllOptions(true); @@ -730,7 +729,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Set data for fast MatVec // for (auto levelSmootherParams : smootherParams) { for(LO levelIdx = 0; levelIdx < numLevels; ++levelIdx) { - smootherParams[levelIdx]->set("Use fast MatVec", useFastMatVec); smootherParams[levelIdx]->set("Fast MatVec: interface LIDs", regionMatVecLIDsPerLevel[levelIdx]); smootherParams[levelIdx]->set("Fast MatVec: interface importer", @@ -858,19 +856,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar //////////////////////////////////////////////////////////////////////// // SWITCH BACK TO NON-LEVEL VARIABLES //////////////////////////////////////////////////////////////////////// - { - if (useFastMatVec) - { - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]); - } - else - { - computeResidual(regRes, regX, regB, regionGrpMats, - revisedRowMapPerGrp, rowImportPerGrp); - } - - scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true); - } + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]); + scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true); compRes = VectorFactory::Build(dofMap, true); regionalToComposite(regRes, compRes, rowImportPerGrp);