Skip to content

Commit

Permalink
MueLu: Fixing Maxwell1 so it robustly works w/ repartitioning (#13079)
Browse files Browse the repository at this point in the history
* MueLu Maxwell1 fixes (with lots of debug)

* MueLu: This actually works.  Stick a fork in the sand

* MueLu: Reerting to remove debug statements

* MueLu: Reverting to remove debug statements

* MueLu: Reverting to remove debug statements

* MueLu: Reverting to remove debug statements

* MueLu: Reverting to remove debug statements

* MueLu:  The small test problem seems to be working.  Stick a fork in it

* MueLu: Removing debug output

* MueLu: Updating maxwell unit tests

* MueLu: Adding accessor to Hierarchy manager

* MueLu: Pushing HM cleanup to Maxwell1

* MueLu: Removing comments

* MueLu: Removing comments and output

* MueLu: Removing comments and output

* MueLu: Sticking a for in something working

* MueLu: Removing extra gunk

* MueLu: Removing debugging info

* MueLu: Initial alloc cleanup

* MueLu: caglusa's suggestion

* MueLu: CLANGgit push origin develop:csiefer-maxwell1-fix!

* MueLu: Fixing off-by-one error
  • Loading branch information
csiefer2 authored Jul 18, 2024
1 parent c634ddf commit 05c5c81
Show file tree
Hide file tree
Showing 15 changed files with 174 additions and 65 deletions.
24 changes: 12 additions & 12 deletions packages/muelu/doc/UsersGuide/masterList.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1053,22 +1053,12 @@
<type>string</type>
<default>"{}"</default>
<description>Keep a subset of the hierarchy data after the
hierarchy is constructed. Use comma-separated array in braces</description>
hierarchy is constructed. This could be for disk I/O, or for
use in other preconditioners. Use comma-separated array in braces</description>
<visible>false</visible>
<comment-ML>not supported by ML</comment-ML>
</parameter>

<parameter>
<name>save data</name>
<type>string</type>
<default>"{}"</default>
<description>Data to dump to disk after the
hierarchy is constructed. Use comma-separated array in braces</description>
<visible>false</visible>
<comment-ML>not supported by ML</comment-ML>
</parameter>


<parameter>
<name>print initial parameters</name>
<type>bool</type>
Expand Down Expand Up @@ -1786,6 +1776,16 @@ Only used when tentative: calculate qr is set to false.</description>
<visible>false</visible>
</parameter>

<parameter>
<name>repartition: save importer</name>
<type>bool</type>
<default>false</default>
<description>Mark Importer as 'Final' to keep it on the hierarchy after setup is complete.</description>
<comment-ML>not supported by ML</comment-ML>
<visible>false</visible>
</parameter>


</rebalancing>

<rap>
Expand Down
8 changes: 4 additions & 4 deletions packages/muelu/doc/UsersGuide/paramlist_hidden.tex
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,8 @@
\textit{colmap\_X\_level.mm}.}

\cbb{keep data}{string}{"{}"}{Keep a subset of the hierarchy data after the
hierarchy is constructed. Use comma-separated array in braces}

\cbb{save data}{string}{"{}"}{Data to dump to disk after the
hierarchy is constructed. Use comma-separated array in braces}
hierarchy is constructed. This could be for disk I/O, or for
use in other preconditioners. Use comma-separated array in braces}

\cbb{print initial parameters}{bool}{true}{Print parameters provided for a hierarchy construction.}

Expand Down Expand Up @@ -401,6 +399,8 @@

\cbb{repartition: use subcommunicators}{bool}{true}{Use subcommunicators on coarser levels.}

\cbb{repartition: save importer}{bool}{false}{Mark Importer as 'Final' to keep it on the hierarchy after setup is complete.}

\cbb{rap: relative diagonal floor}{Array(double)}{{}}{Will boost diagonals on A matrices to the given floor.}

\cbb{rap: fix zero diagonals}{bool}{false}{Set zero diagonals on coarse grids to one.}
Expand Down
20 changes: 14 additions & 6 deletions packages/muelu/src/Interface/MueLu_HierarchyManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class HierarchyManager : public HierarchyFactory<Scalar, LocalOrdinal, GlobalOrd
}

H.SetPRrebalance(doPRrebalance_);

H.SetPRViaCopyrebalance(doPRViaCopyrebalance_);
H.SetImplicitTranspose(implicitTranspose_);
H.SetFuseProlongationAndUpdate(fuseProlongationAndUpdate_);
Expand All @@ -194,6 +195,7 @@ class HierarchyManager : public HierarchyFactory<Scalar, LocalOrdinal, GlobalOrd
// 2. Interpreter constructs keep_ array with names and factories for
// that level
// 3. For each level, we call Keep(name, factory) for each keep_

for (int i = 0; i < numDesiredLevel_; i++) {
std::map<int, std::vector<keep_pair>>::const_iterator it = keep_.find(i);
if (it != keep_.end()) {
Expand Down Expand Up @@ -221,9 +223,9 @@ class HierarchyManager : public HierarchyFactory<Scalar, LocalOrdinal, GlobalOrd
ExportDataSetKeepFlags(H, elementToNodeMapsToPrint_, "pcoarsen: element to node map");
#endif

// Data to save only (these do not have a level, so we do all levels)
for (int i = 0; i < dataToSave_.size(); i++)
ExportDataSetKeepFlagsAll(H, dataToSave_[i]);
// Data to keep only (these do not have a level, so we do all levels)
for (int i = 0; i < dataToKeep_.size(); i++)
ExportDataSetKeepFlagsAll(H, dataToKeep_[i]);

int levelID = 0;
int lastLevelID = numDesiredLevel_ - 1;
Expand All @@ -240,6 +242,7 @@ class HierarchyManager : public HierarchyFactory<Scalar, LocalOrdinal, GlobalOrd
isLastLevel = r || (levelID == lastLevelID);
levelID++;
}

if (!matvecParams_.is_null())
H.SetMatvecParams(matvecParams_);
H.AllocateLevelMultiVectors(sizeOfMultiVectors_);
Expand Down Expand Up @@ -277,6 +280,12 @@ class HierarchyManager : public HierarchyFactory<Scalar, LocalOrdinal, GlobalOrd

} // SetupHierarchy

//! Set the number of desired levels.
void SetNumDesiredLevel(int numDesiredLevel) { numDesiredLevel_ = numDesiredLevel; }

//! Get the number of desired levels.
int GetNumDesiredLevel() { return numDesiredLevel_; }

//@}

typedef std::map<std::string, RCP<const FactoryBase>> FactoryMap;
Expand Down Expand Up @@ -311,7 +320,6 @@ class HierarchyManager : public HierarchyFactory<Scalar, LocalOrdinal, GlobalOrd

//! @group Hierarchy parameters
//! @{

mutable int numDesiredLevel_;
Xpetra::global_size_t maxCoarseSize_;
MsgType verbosity_;
Expand Down Expand Up @@ -341,8 +349,8 @@ class HierarchyManager : public HierarchyFactory<Scalar, LocalOrdinal, GlobalOrd
Teuchos::Array<int> aggregatesToPrint_;
Teuchos::Array<int> elementToNodeMapsToPrint_;

// Data we'll need to save, not necessarily print
Teuchos::Array<std::string> dataToSave_;
// Data we'll need to keep, either to dump to disk or to use post-setup
Teuchos::Array<std::string> dataToKeep_;

// Matrices we'll need to print
std::map<std::string, Teuchos::Array<int>> matricesToPrint_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,11 @@ void ParameterListInterpreter<Scalar, LocalOrdinal, GlobalOrdinal, Node>::

(void)MUELU_TEST_AND_SET_VAR(paramList, "debug: graph level", int, this->graphOutputLevel_);

// Generic data saving (this saves the data on all levels)
if (paramList.isParameter("save data"))
this->dataToSave_ = Teuchos::getArrayFromStringParameter<std::string>(paramList, "save data");
// Generic data keeping (this keeps the data on all levels)
if (paramList.isParameter("keep data"))
this->dataToKeep_ = Teuchos::getArrayFromStringParameter<std::string>(paramList, "keep data");

// Save level data
// Export level data
if (paramList.isSublist("export data")) {
ParameterList printList = paramList.sublist("export data");

Expand Down Expand Up @@ -1630,6 +1630,7 @@ void ParameterListInterpreter<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: print partition distribution", bool, repartParams);
MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap parts", bool, repartParams);
MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: remap num values", int, repartParams);
MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "repartition: save importer", bool, repartParams);
repartFactory->SetParameterList(repartParams);
repartFactory->SetFactory("A", manager.GetFactory("A"));
repartFactory->SetFactory("number of partitions", manager.GetFactory("number of partitions"));
Expand Down Expand Up @@ -2272,7 +2273,7 @@ void ParameterListInterpreter<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
}

if (hieraList.isParameter("number of vectors")) {
this->numDesiredLevel_ = hieraList.get<int>("number of vectors");
this->sizeOfMultiVectors_ = hieraList.get<int>("number of vectors");
hieraList.remove("number of vectors");
}

Expand Down
3 changes: 2 additions & 1 deletion packages/muelu/src/MueCentral/MueLu_Hierarchy_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ class Hierarchy : public BaseClass {
template <class S2, class LO2, class GO2, class N2>
friend class Hierarchy;

private:
int LastLevelID() const { return Levels_.size() - 1; }

private:
void DumpCurrentGraph(int level) const;

public:
Expand Down
6 changes: 3 additions & 3 deletions packages/muelu/src/MueCentral/MueLu_MasterList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ namespace MueLu {
"<Parameter name=\"aggregate qualities: mode\" type=\"string\" value=\"eigenvalue\"/>"
"<ParameterList name=\"export data\"/>"
"<Parameter name=\"keep data\" type=\"string\" value=\"{}\"/>"
"<Parameter name=\"save data\" type=\"string\" value=\"{}\"/>"
"<Parameter name=\"print initial parameters\" type=\"bool\" value=\"true\"/>"
"<Parameter name=\"print unused parameters\" type=\"bool\" value=\"true\"/>"
"<Parameter name=\"transpose: use implicit\" type=\"bool\" value=\"false\"/>"
Expand Down Expand Up @@ -317,6 +316,7 @@ namespace MueLu {
"<Parameter name=\"repartition: explicit via new copy rebalance P and R\" type=\"bool\" value=\"false\"/>"
"<Parameter name=\"repartition: rebalance Nullspace\" type=\"bool\" value=\"true\"/>"
"<Parameter name=\"repartition: use subcommunicators\" type=\"bool\" value=\"true\"/>"
"<Parameter name=\"repartition: save importer\" type=\"bool\" value=\"false\"/>"
"<Parameter name=\"rap: relative diagonal floor\" type=\"Array(double)\" value=\"{}\"/>"
"<Parameter name=\"rap: fix zero diagonals\" type=\"bool\" value=\"false\"/>"
"<Parameter name=\"rap: fix zero diagonals threshold\" type=\"double\" value=\"0.\"/>"
Expand Down Expand Up @@ -741,8 +741,6 @@ namespace MueLu {

("keep data","keep data")

("save data","save data")

("ML print initial list","print initial parameters")

("print unused","print unused parameters")
Expand Down Expand Up @@ -899,6 +897,8 @@ namespace MueLu {

("repartition: use subcommunicators","repartition: use subcommunicators")

("repartition: save importer","repartition: save importer")

("rap: relative diagonal floor","rap: relative diagonal floor")

("rap: fix zero diagonals","rap: fix zero diagonals")
Expand Down
1 change: 1 addition & 0 deletions packages/muelu/src/Operators/MueLu_Maxwell1_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "MueLu_Level_fwd.hpp"
#include "MueLu_Hierarchy_fwd.hpp"
#include "MueLu_RAPFactory_fwd.hpp"
#include "MueLu_RebalanceAcFactory_fwd.hpp"
#include "MueLu_PerfUtils_fwd.hpp"
#include "MueLu_SmootherBase_fwd.hpp"

Expand Down
71 changes: 51 additions & 20 deletions packages/muelu/src/Operators/MueLu_Maxwell1_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "MueLu_Level.hpp"
#include "MueLu_Hierarchy.hpp"
#include "MueLu_RAPFactory.hpp"
#include "MueLu_RebalanceAcFactory.hpp"
#include "MueLu_Monitor.hpp"
#include "MueLu_PerfUtils.hpp"
#include "MueLu_ParameterListInterpreter.hpp"
Expand Down Expand Up @@ -111,6 +112,7 @@ void Maxwell1<Scalar, LocalOrdinal, GlobalOrdinal, Node>::setParameters(Teuchos:

list = newList;
}

std::string mode_string = list.get("maxwell1: mode", MasterList::getDefault<std::string>("maxwell1: mode"));
applyBCsTo22_ = list.get("maxwell1: apply BCs to 22", false);
dump_matrices_ = list.get("maxwell1: dump matrices", MasterList::getDefault<bool>("maxwell1: dump matrices"));
Expand Down Expand Up @@ -273,16 +275,20 @@ void Maxwell1<Scalar, LocalOrdinal, GlobalOrdinal, Node>::compute(bool reuse) {
if (!Coords_.is_null())
precList22_.sublist("user data").set("Coordinates", Coords_);

/* Repartitioning *must* be in sync between hierarchies, but the
only thing we need to watch here is the subcomms, since ReitzingerPFactory
won't look at all the other stuff */
/* Repartitioning *must* be in sync between hierarchies, which means
that we need to keep the importers in sync too */
if (precList22_.isParameter("repartition: enable")) {
bool repartition = precList22_.get<bool>("repartition: enable");
precList11_.set("repartition: enable", repartition);

// If we're repartitioning (2,2), we need to rebalance for (1,1) to do the right thing
if (repartition)
// If we're repartitioning (2,2), we need to rebalance for (1,1) to do the right thing,
// as well as keep the importers
if (repartition) {
// FIXME: We should probably update rather than clobber
precList22_.set("repartition: save importer", true);
// precList22_.set("repartition: rebalance P and R", false);
precList22_.set("repartition: rebalance P and R", true);
}

if (precList22_.isParameter("repartition: use subcommunicators")) {
precList11_.set("repartition: use subcommunicators", precList22_.get<bool>("repartition: use subcommunicators"));
Expand Down Expand Up @@ -407,8 +413,8 @@ void Maxwell1<Scalar, LocalOrdinal, GlobalOrdinal, Node>::compute(bool reuse) {
Hierarchy11_->AddNewLevel();
RCP<Level> NodeL = Hierarchy22_->GetLevel(i);
RCP<Level> EdgeL = Hierarchy11_->GetLevel(i);
RCP<Operator> NodeOp = NodeL->Get<RCP<Operator> >("A");
RCP<Matrix> NodeAggMatrix = rcp_dynamic_cast<Matrix>(NodeOp);
RCP<Operator> NodeAggOp = NodeL->Get<RCP<Operator> >("A");
RCP<Matrix> NodeAggMatrix = rcp_dynamic_cast<Matrix>(NodeAggOp);
std::string labelstr = FormattingHelper::getColonLabel(EdgeL->getObjectLabel());

if (i == 0) {
Expand All @@ -429,9 +435,16 @@ void Maxwell1<Scalar, LocalOrdinal, GlobalOrdinal, Node>::compute(bool reuse) {
TEUCHOS_TEST_FOR_EXCEPTION(NodalP_ones.is_null(), Exceptions::RuntimeError, "Applying ones to prolongator failed");
EdgeL->Set("Pnodal", NodalP_ones);

// Get the importer if we have one (for repartitioning)
RCP<const Import> importer;
if (NodeL->IsAvailable("Importer")) {
importer = NodeL->Get<RCP<const Import> >("Importer");
EdgeL->Set("NodeImporter", importer);
}

// If we repartition a processor away, a RCP<Operator> is stuck
// on the level instead of an RCP<Matrix>
if (!NodeAggMatrix.is_null()) {
if (!OldSmootherMatrix.is_null() && !NodalP_ones.is_null()) {
EdgeL->Set("NodeAggMatrix", NodeAggMatrix);
if (!have_generated_Kn) {
// The user gave us a Kn on the fine level, so we're using a seperate aggregation
Expand All @@ -446,12 +459,33 @@ void Maxwell1<Scalar, LocalOrdinal, GlobalOrdinal, Node>::compute(bool reuse) {
Teuchos::ParameterList RAPlist;
RAPlist.set("rap: fix zero diagonals", false);
RCP<Matrix> NewKn = Maxwell_Utils<SC, LO, GO, NO>::PtAPWrapper(OldSmootherMatrix, NodalP_ones, RAPlist, labelstr);
EdgeL->Set("NodeMatrix", NewKn);

// If there's an importer, we need to Rebalance the NewKn
if (!importer.is_null()) {
ParameterList rebAcParams;
rebAcParams.set("repartition: use subcommunicators", true);
rebAcParams.set("repartition: use subcommunicators in place", true);
auto newAfact = rcp(new RebalanceAcFactory());
newAfact->SetParameterList(rebAcParams);
RCP<const Map> InPlaceMap = NodeAggMatrix.is_null() ? Teuchos::null : NodeAggMatrix->getRowMap();

Level fLevel, cLevel;
cLevel.SetPreviousLevel(Teuchos::rcpFromRef(fLevel));
cLevel.Set("InPlaceMap", InPlaceMap);
cLevel.Set("A", NewKn);
cLevel.Request("A", newAfact.get());
newAfact->Build(fLevel, cLevel);

NewKn = cLevel.Get<RCP<Matrix> >("A", newAfact.get());
EdgeL->Set("NodeMatrix", NewKn);
} else {
EdgeL->Set("NodeMatrix", NewKn);
}

// Fix the old one
double thresh = parameterList_.get("maxwell1: nodal smoother fix zero diagonal threshold", 1e-10);
if (thresh > 0.0) {
printf("CMS: Reparing diagonal after next level generation\n");
GetOStream(Runtime0) << "Maxwell1::compute(): Fixing zero diagonal for nodal smoother matrix" << std::endl;
Scalar replacement = Teuchos::ScalarTraits<Scalar>::one();
Xpetra::MatrixUtils<SC, LO, GO, NO>::CheckRepairMainDiagonal(OldSmootherMatrix, true, GetOStream(Warnings1), thresh, replacement);
}
Expand All @@ -464,19 +498,13 @@ void Maxwell1<Scalar, LocalOrdinal, GlobalOrdinal, Node>::compute(bool reuse) {
}
} else {
// We've partitioned things away.
EdgeL->Set("NodeMatrix", NodeOp);
EdgeL->Set("NodeAggMatrix", NodeOp);
EdgeL->Set("NodeMatrix", NodeAggOp);
EdgeL->Set("NodeAggMatrix", NodeAggOp);
}

OldEdgeLevel = EdgeL;
}

// Get the importer if we have one (for repartitioning)
// This will get used in ReitzingerPFactory
if (NodeL->IsAvailable("Importer")) {
auto importer = NodeL->Get<RCP<const Import> >("Importer");
EdgeL->Set("NodeImporter", importer);
}
} // end Hierarchy22 loop

////////////////////////////////////////////////////////////////////////////////
Expand All @@ -500,8 +528,11 @@ void Maxwell1<Scalar, LocalOrdinal, GlobalOrdinal, Node>::compute(bool reuse) {
RCP<HierarchyManager<SC, LO, GO, NO> > mueLuFactory = rcp(new ParameterListInterpreter<SC, LO, GO, NO>(processedPrecList11, SM_Matrix_->getDomainMap()->getComm()));
Hierarchy11_->setlib(SM_Matrix_->getDomainMap()->lib());
Hierarchy11_->SetProcRankVerbose(SM_Matrix_->getDomainMap()->getComm()->getRank());
// Stick the non-serializible data on the hierarchy.
HierarchyUtils<SC, LO, GO, NO>::AddNonSerializableDataToHierarchy(*mueLuFactory, *Hierarchy11_, nonSerialList11);

// Stick the non-serializible data on the hierarchy and do setup
if (nonSerialList11.numParams() > 0) {
HierarchyUtils<SC, LO, GO, NO>::AddNonSerializableDataToHierarchy(*mueLuFactory, *Hierarchy11_, nonSerialList11);
}
mueLuFactory->SetupHierarchy(*Hierarchy11_);

if (refmaxwellCoarseSolve) {
Expand Down
2 changes: 1 addition & 1 deletion packages/muelu/src/Operators/MueLu_Maxwell_Utils_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class Maxwell_Utils : public VerboseObject {

//! Performs an P^T AP
static RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
PtAPWrapper(const RCP<Matrix>& A, const RCP<Matrix>& P, Teuchos::ParameterList& params, std::string& label);
PtAPWrapper(const RCP<Matrix>& A, const RCP<Matrix>& P, Teuchos::ParameterList& params, const std::string& label);
};

} // namespace MueLu
Expand Down
2 changes: 1 addition & 1 deletion packages/muelu/src/Operators/MueLu_Maxwell_Utils_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ void Maxwell_Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
Maxwell_Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
PtAPWrapper(const RCP<Matrix>& A, const RCP<Matrix>& P, ParameterList& params, std::string& label) {
PtAPWrapper(const RCP<Matrix>& A, const RCP<Matrix>& P, ParameterList& params, const std::string& label) {
Level fineLevel, coarseLevel;
fineLevel.SetFactoryManager(null);
coarseLevel.SetFactoryManager(null);
Expand Down
Loading

0 comments on commit 05c5c81

Please sign in to comment.