From d8461b0b9ee046bc5e1cd4ad21fe9a59d7c4a2ee Mon Sep 17 00:00:00 2001 From: Matthias Mayr Date: Wed, 12 Aug 2020 09:45:30 +0200 Subject: [PATCH] MueLu: rm regionMG MatVec w/ double communication We have recently added a "FastMatVec", that precomputes the communication patter for region MG MatVecs to replace the "old" implementation, that performed two rounds of communication during a MatVec. Now, we have enough confidence in the new implementation to get rid of the old one. --- .../regionMG/src/SetupRegionHierarchy_def.hpp | 35 +------ .../regionMG/src/SetupRegionMatrix_def.hpp | 51 ---------- .../regionMG/src/SetupRegionSmoothers_def.hpp | 28 ++---- .../structured/Driver_Structured_Regions.cpp | 17 +--- .../muelu/test/unit_tests/RegionMatrix.cpp | 98 ------------------- 5 files changed, 12 insertions(+), 217 deletions(-) diff --git a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp index df89c68f7480..01f5be07c642 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionHierarchy_def.hpp @@ -1014,19 +1014,13 @@ void vCycle(const int l, ///< ID of current level tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 2 - compute residual"))); - const bool useFastMatVec = smootherParams[l]->get("Use fast MatVec"); - Array > regRes(maxRegPerProc); if(useCachedVectors) { regRes = levelList.get > >("residual"); } else { createRegionalVector(regRes, regRowMaps[l]); } - if (useFastMatVec) - computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]); - else - computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], - regRowMaps[l], regRowImporters[l]); + computeResidual(regRes, fineRegX, fineRegB, regMatrices[l], *smootherParams[l]); tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 3 - scale interface"))); @@ -1039,7 +1033,7 @@ void vCycle(const int l, ///< ID of current level // Transfer to coarse level Array > coarseRegX(maxRegPerProc); Array > coarseRegB(maxRegPerProc); - if (useFastMatVec) + { // Get pre-communicated communication patterns for the fast MatVec const ArrayRCP regionInterfaceLIDs = smootherParams[l+1]->get>("Fast MatVec: interface LIDs"); @@ -1055,22 +1049,6 @@ void vCycle(const int l, ///< ID of current level // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap())); } } - else - { - for (int j = 0; j < maxRegPerProc; j++) { - coarseRegX[j] = VectorFactory::Build(regRowMaps[l+1][j], true); - coarseRegB[j] = VectorFactory::Build(regRowMaps[l+1][j], true); - - regProlong[l+1][j]->apply(*regRes[j], *coarseRegB[j], Teuchos::TRANS); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regRes[j]->getMap())); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegB[j]->getMap())); - } - - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 5 - sum interface values"))); - - sumInterfaceValues(coarseRegB, regRowMaps[l+1], regRowImporters[l+1]); - } tm = Teuchos::null; bool coarseZeroInitGuess = true; @@ -1085,7 +1063,6 @@ void vCycle(const int l, ///< ID of current level // Transfer coarse level correction to fine level Array > regCorrection(maxRegPerProc); - if (useFastMatVec) { // Get pre-communicated communication patterns for the fast MatVec const ArrayRCP regionInterfaceLIDs = smootherParams[l]->get>("Fast MatVec: interface LIDs"); @@ -1099,14 +1076,6 @@ void vCycle(const int l, ///< ID of current level // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap())); } } - else{ - for (int j = 0; j < maxRegPerProc; j++) { - regCorrection[j] = VectorFactory::Build(regRowMaps[l][j], true); - regProlong[l+1][j]->apply(*coarseRegX[j], *regCorrection[j]); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getDomainMap()->isSameAs(*coarseRegX[j]->getMap())); - // TEUCHOS_ASSERT(regProlong[l+1][j]->getRangeMap()->isSameAs(*regCorrection[j]->getMap())); - } - } tm = Teuchos::null; tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("vCycle: 7 - add coarse grid correction"))); diff --git a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp index bfeb4112114e..6833dad7ea27 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionMatrix_def.hpp @@ -719,57 +719,6 @@ void regionalToComposite(const std::vector -Array > > -computeResidual(Array > >& regRes, ///< residual (to be evaluated) - const Array > > regX, ///< left-hand side (solution) - const Array > > regB, ///< right-hand side (forcing term) - const std::vector > > regionGrpMats, - const std::vector > > revisedRowMapPerGrp, ///< revised row maps in region layout [in] (actually extracted from regionGrpMats) - const std::vector > > rowImportPerGrp ///< row importer in region layout [in] - ) -{ -#include "Xpetra_UseShortNames.hpp" - using Teuchos::TimeMonitor; - const int maxRegPerProc = regX.size(); - - RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 1 - Rreg = Areg*Xreg"))); - - /* Update the residual vector - * 1. Compute tmp = A * regX in each region - * 2. Sum interface values in tmp due to duplication (We fake this by scaling to reverse the basic splitting) - * 3. Compute r = B - tmp - */ - for (int j = 0; j < maxRegPerProc; j++) { // step 1 - regionGrpMats[j]->apply(*regX[j], *regRes[j]); - // TEUCHOS_ASSERT(regionGrpMats[j]->getDomainMap()->isSameAs(*regX[j]->getMap())); - // TEUCHOS_ASSERT(regionGrpMats[j]->getRangeMap()->isSameAs(*regRes[j]->getMap())); - } - - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 2 - sumInterfaceValues"))); - - sumInterfaceValues(regRes, revisedRowMapPerGrp, rowImportPerGrp); - - tm = Teuchos::null; - tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("computeResidual: 3 - Rreg = Breg - Rreg"))); - - for (int j = 0; j < maxRegPerProc; j++) { // step 3 - regRes[j]->update(1.0, *regB[j], -1.0); - // TEUCHOS_ASSERT(regRes[j]->getMap()->isSameAs(*regB[j]->getMap())); - } - - tm = Teuchos::null; - - return regRes; -} // computeResidual /*! \brief Compute local data needed to perform a MatVec in region format diff --git a/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp b/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp index cd9d3510c830..4776e950f42a 100644 --- a/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp +++ b/packages/muelu/research/regionMG/src/SetupRegionSmoothers_def.hpp @@ -153,7 +153,6 @@ void jacobiIterate(RCP smootherParams, const int maxIter = smootherParams->get ("smoother: sweeps"); const double damping = smootherParams->get("smoother: damping"); Array > diag_inv = smootherParams->get > >("smoothers: inverse diagonal"); - const bool useFastMatVec = smootherParams->get("Use fast MatVec"); Array > regRes(maxRegPerProc); createRegionalVector(regRes, revisedRowMapPerGrp); @@ -168,10 +167,7 @@ void jacobiIterate(RCP smootherParams, } } else { - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); // update solution according to Jacobi's method for (int j = 0; j < maxRegPerProc; j++) { @@ -212,7 +208,6 @@ void GSIterate(RCP smootherParams, const int maxIter = smootherParams->get("smoother: sweeps"); const double damping = smootherParams->get("smoother: damping"); Teuchos::Array > diag_inv = smootherParams->get > >("smoothers: inverse diagonal"); - const bool useFastMatVec = smootherParams->get("Use fast MatVec"); Array > regRes(maxRegPerProc); createRegionalVector(regRes, revisedRowMapPerGrp); @@ -225,10 +220,7 @@ void GSIterate(RCP smootherParams, // Update the residual vector if (!zeroInitGuess) { - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); } // update the solution and the residual @@ -441,7 +433,6 @@ void chebyshevIterate(RCP smootherParams, const Scalar lambdaMax = smootherParams->get("chebyshev: lambda max"); const Scalar boostFactor = smootherParams->get("smoother: Chebyshev boost factor"); Teuchos::Array > diag_inv = smootherParams->get > >("smoothers: inverse diagonal"); - const bool useFastMatVec = smootherParams->get("Use fast MatVec"); // Define some constants for convenience const Scalar SC_ZERO = Teuchos::ScalarTraits::zero(); @@ -479,11 +470,10 @@ void chebyshevIterate(RCP smootherParams, regX[j]->update(SC_ONE, *regP[j], SC_ZERO); // X = 0 + P } } - else { // Compute residual vector - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + else { + // Compute residual vector + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); + for(int j = 0; j < maxRegPerProc; j++) { regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO); // z = D_inv * R, that is, D \ R. regP[j]->update(SC_ONE/theta, *regZ[j], SC_ZERO); // P = 1/theta Z @@ -494,10 +484,8 @@ void chebyshevIterate(RCP smootherParams, // The rest of the iterations for (int i = 1; i < maxIter; ++i) { // Compute residual vector - if (useFastMatVec) - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); - else - computeResidual(regRes, regX, regB, regionGrpMats, revisedRowMapPerGrp, rowImportPerGrp); + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams); + // z = D_inv * R, that is, D \ R. for(int j = 0; j < maxRegPerProc; j++) { regZ[j]->elementWiseMultiply(SC_ONE, *diag_inv[j], *regRes[j], SC_ZERO); diff --git a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp index 6b934100c4a4..2a5e9a080f1d 100644 --- a/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp +++ b/packages/muelu/research/regionMG/test/structured/Driver_Structured_Regions.cpp @@ -189,7 +189,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar int cacheSize = 0; clp.setOption("cachesize", &cacheSize, "cache size (in KB)"); bool useStackedTimer = false; clp.setOption("stacked-timer","no-stacked-timer", &useStackedTimer, "use stacked timer"); bool showTimerSummary = true; clp.setOption("show-timer-summary", "no-show-timer-summary", &showTimerSummary, "Switch on/off the timer summary at the end of the run."); - bool useFastMatVec = true; clp.setOption("fastMV", "no-fastMV", &useFastMatVec, "Use the fast MatVec implementation (or not)"); std::string cycleType = "V"; clp.setOption("cycleType", &cycleType, "{Multigrid cycle type. Possible values: V, W."); clp.recogniseAllOptions(true); @@ -730,7 +729,6 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar // Set data for fast MatVec // for (auto levelSmootherParams : smootherParams) { for(LO levelIdx = 0; levelIdx < numLevels; ++levelIdx) { - smootherParams[levelIdx]->set("Use fast MatVec", useFastMatVec); smootherParams[levelIdx]->set("Fast MatVec: interface LIDs", regionMatVecLIDsPerLevel[levelIdx]); smootherParams[levelIdx]->set("Fast MatVec: interface importer", @@ -858,19 +856,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib& lib, int ar //////////////////////////////////////////////////////////////////////// // SWITCH BACK TO NON-LEVEL VARIABLES //////////////////////////////////////////////////////////////////////// - { - if (useFastMatVec) - { - computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]); - } - else - { - computeResidual(regRes, regX, regB, regionGrpMats, - revisedRowMapPerGrp, rowImportPerGrp); - } - - scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true); - } + computeResidual(regRes, regX, regB, regionGrpMats, *smootherParams[0]); + scaleInterfaceDOFs(regRes, regInterfaceScalings[0], true); compRes = VectorFactory::Build(dofMap, true); regionalToComposite(regRes, compRes, rowImportPerGrp); diff --git a/packages/muelu/test/unit_tests/RegionMatrix.cpp b/packages/muelu/test/unit_tests/RegionMatrix.cpp index 98b67e017373..710b22b486ec 100644 --- a/packages/muelu/test/unit_tests/RegionMatrix.cpp +++ b/packages/muelu/test/unit_tests/RegionMatrix.cpp @@ -645,103 +645,6 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(RegionMatrix, RegionToCompositeMatrix, Scalar, } // RegionToCompositeMatrix -// This test aims at checking that apply regionA to a region vector has the same effect as -// applying A to a composite vector. Of course the region vector needs to be brought back -// to composite formate before verifying the equivalence. -TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(RegionMatrix, MatVec, Scalar, LocalOrdinal, GlobalOrdinal, Node) -{ -# include "MueLu_UseShortNames.hpp" - MUELU_TESTING_SET_OSTREAM; - MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,Node); - - using TST = Teuchos::ScalarTraits; - using magnitude_type = typename TST::magnitudeType; - using TMT = Teuchos::ScalarTraits; - using real_type = typename TST::coordinateType; - using RealValuedMultiVector = Xpetra::MultiVector; - using test_factory = TestHelpers::TestFactory; - - out << "version: " << MueLu::Version() << std::endl; - - // Get MPI parameter - RCP > comm = TestHelpers::Parameters::getDefaultComm(); - - GO nx = 5, ny = 5, nz = 1; - Teuchos::CommandLineProcessor &clp = Teuchos::UnitTestRepository::getCLP(); - Galeri::Xpetra::Parameters galeriParameters(clp, nx, ny, nz, "Laplace2D"); - Teuchos::ParameterList galeriList = galeriParameters.GetParameterList(); - std::string matrixType = galeriParameters.GetMatrixType(); - - // Build maps for the problem - const LO numDofsPerNode = 1; - RCP nodeMap = Galeri::Xpetra::CreateMap(TestHelpers::Parameters::getLib(), - "Cartesian2D", comm, galeriList); - RCP dofMap = Xpetra::MapFactory::Build(nodeMap, numDofsPerNode); - - // Build the Xpetra problem - RCP > Pr = - Galeri::Xpetra::BuildProblem(galeriParameters.GetMatrixType(), dofMap, galeriList); - - // Generate the operator - RCP A = Pr->BuildMatrix(); - A->SetFixedBlockSize(numDofsPerNode); - - // Create auxiliary data for MG - RCP nullspace = Pr->BuildNullspace(); - RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("2D", nodeMap, galeriList); - - // create the region maps, importer and operator from composite counter parts - const int maxRegPerProc = 1; - std::vector > rowMapPerGrp(maxRegPerProc), colMapPerGrp(maxRegPerProc); - std::vector > revisedRowMapPerGrp(maxRegPerProc), revisedColMapPerGrp(maxRegPerProc); - std::vector > rowImportPerGrp(maxRegPerProc), colImportPerGrp(maxRegPerProc); - std::vector > regionGrpMats(maxRegPerProc); - Teuchos::ArrayRCP regionMatVecLIDs; - RCP regionInterfaceImporter; - createRegionMatrix(galeriList, numDofsPerNode, maxRegPerProc, nodeMap, dofMap, A, - rowMapPerGrp, colMapPerGrp, revisedRowMapPerGrp, revisedColMapPerGrp, - rowImportPerGrp, colImportPerGrp, regionGrpMats, - regionMatVecLIDs, regionInterfaceImporter); - - RCP regionMat = regionGrpMats[0]; - - // Create initial vectors in composite format and apply composite A. - // This will give a reference to compare with. - RCP X = VectorFactory::Build(dofMap); - RCP B = VectorFactory::Build(dofMap); - - // we set seed for reproducibility - Utilities::SetRandomSeed(*comm); - X->randomize(); - - // Perform composite MatVec - A->apply(*X, *B, Teuchos::NO_TRANS, TST::one(), TST::zero()); - - // Create the region vectors and apply region A - Array > quasiRegX(maxRegPerProc); - Array > quasiRegB(maxRegPerProc); - Array > regX(maxRegPerProc); - Array > regB(maxRegPerProc); - compositeToRegional(X, quasiRegX, regX, - revisedRowMapPerGrp, rowImportPerGrp); - regB[0] = VectorFactory::Build(revisedRowMapPerGrp[0], true); - regionMat->apply(*regX[0], *regB[0], Teuchos::NO_TRANS, TST::one(), TST::zero()); - - // Now create composite B using region B so we can compare - // composite B with the original B - RCP compB = VectorFactory::Build(dofMap); - regionalToComposite(regB, compB, rowImportPerGrp); - - // Extract the data from B and compB to compare it - ArrayRCP dataB = B->getData(0); - ArrayRCP dataCompB = compB->getData(0); - for(size_t idx = 0; idx < B->getLocalLength(); ++idx) { - TEST_FLOATING_EQUALITY(TST::magnitude(dataB[idx]), - TST::magnitude(dataCompB[idx]), - 100*TMT::eps()); - } - -} // MatVec // This test aims at checking that apply regionA to a region vector has the same effect as // applying A to a composite vector. Of course the region vector needs to be brought back @@ -1671,7 +1574,6 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(RegionMatrix, Laplace3D, Scalar, LocalOrdinal, # define MUELU_ETI_GROUP(Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,CompositeToRegionMatrix,Scalar,LO,GO,Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,RegionToCompositeMatrix,Scalar,LO,GO,Node) \ - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,MatVec,Scalar,LO,GO,Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,FastMatVec,Scalar,LO,GO,Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,FastMatVec3D,Scalar,LO,GO,Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(RegionMatrix,FastMatVec2D_Elasticity,Scalar,LO,GO,Node) \