Skip to content

Commit

Permalink
Merge pull request #13822 from cgcgcg/barriers
Browse files Browse the repository at this point in the history
MueLu & MiniEM: More barriers
  • Loading branch information
cgcgcg authored Feb 19, 2025
2 parents 619abb4 + a9a8557 commit 1669e5d
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,7 @@ void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level
// Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
// We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
if (!haveAddedToDiag)
localLaplDiagData[row] = STS::rmax();
localLaplDiagData[row] = STS::squareroot(STS::rmax());
}
} // subtimer
{
Expand Down
17 changes: 13 additions & 4 deletions packages/muelu/src/Operators/MueLu_RefMaxwell_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1384,10 +1384,19 @@ Teuchos::RCP<Teuchos::TimeMonitor> RefMaxwell<Scalar, LocalOrdinal, GlobalOrdina
if (!syncTimers_)
return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name)));
else {
if (comm.is_null())
return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name), SM_Matrix_->getRowMap()->getComm().ptr()));
else
return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name), comm.ptr()));
if (comm.is_null()) {
{
Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name + "_barrier")));
SM_Matrix_->getRowMap()->getComm()->barrier();
}
return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name)));
} else {
{
Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name + "_barrier")));
comm->barrier();
}
return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer("MueLu " + solverName_ + ": " + name)));
}
}
} else
return Teuchos::null;
Expand Down
15 changes: 8 additions & 7 deletions packages/muelu/test/scaling/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib& lib, int ar
int writeMatricesOPT = -2;
clp.setOption("write", &writeMatricesOPT, "write matrices to file (-1 means all; i>=0 means level i)");
std::string dsolveType = "belos", solveType;
clp.setOption("solver", &dsolveType, "solve type: (none | belos | standalone | matvec)");
clp.setOption("solver", &dsolveType, "solve type: (none | belos | standalone | matvec | simple_cg )");
std::string belosType = "cg";
clp.setOption("belosType", &belosType, "belos solver type: (Pseudoblock CG | Block CG | Pseudoblock GMRES | Block GMRES | ...) see BelosSolverFactory.hpp for exhaustive list of solvers");
bool computeCondEst = false;
Expand Down Expand Up @@ -519,16 +519,17 @@ int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib& lib, int ar
userParamList.set("Node Comm", nodeComm);
}
#endif
out2 << "*********** MueLu ParameterList ***********" << std::endl;
out2 << mueluList;
out2 << "*******************************************" << std::endl;

RCP<Hierarchy> H;
RCP<Operator> Prec;
// Build the preconditioner numRebuilds+1 times
MUELU_SWITCH_TIME_MONITOR(tm, "Driver: 2 - MueLu Setup");
PreconditionerSetup(A, coordinates, nullspace, material, mueluList, profileSetup, useAMGX, useML, setNullSpace, numRebuilds, H, Prec);
if (solvePreconditioned) {
out2 << "*********** MueLu ParameterList ***********" << std::endl;
out2 << mueluList;
out2 << "*******************************************" << std::endl;

MUELU_SWITCH_TIME_MONITOR(tm, "Driver: 2 - MueLu Setup");
PreconditionerSetup(A, coordinates, nullspace, material, mueluList, profileSetup, useAMGX, useML, setNullSpace, numRebuilds, H, Prec);
}
comm->barrier();
tm = Teuchos::null;

Expand Down
149 changes: 149 additions & 0 deletions packages/muelu/test/scaling/DriverCore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,153 @@ struct Matvec_Wrapper<double, int, GlobalOrdinal, Tpetra::KokkosCompat::KokkosSe
};
#endif

template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
bool cg_solve(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>> A,
Teuchos::RCP<const Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> b,
Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>> x,
const double tolerance, const int max_iter, const bool barriers) {
using Teuchos::TimeMonitor;
using Vector = Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
using STS = typename Teuchos::ScalarTraits<Scalar>;
using magnitude_type = typename STS::magnitudeType;

std::string cgTimerName = "CG: total";
std::string cgBarrierTimerName = "CG: total barrier";
std::string addTimerName = "CG: axpby";
std::string matvecTimerName = "CG: spmv";
std::string dotTimerName = "CG: dot";
std::string matvecBarrierTimerName = "CG: barrier spmv";
std::string dotBarrierTimerName = "CG: barrier dot";

auto comm = A->getRowMap()->getComm();
int myproc = comm->getRank();

typedef typename Vector::scalar_type ScalarType;
typedef typename Vector::local_ordinal_type LO;
Teuchos::RCP<Vector> r, p, Ap;
// int max_iter=200;
// double tolerance = 1e-8;
r = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRangeMap());
p = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRangeMap());
Ap = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRangeMap());

magnitude_type normr = 0;
magnitude_type rtrans = 0;
magnitude_type oldrtrans = 0;

LO print_freq = max_iter / 10;
print_freq = std::min(print_freq, 50);
print_freq = std::max(print_freq, 1);

if (barriers) {
TimeMonitor t(*TimeMonitor::getNewTimer(cgBarrierTimerName));
comm->barrier();
}
TimeMonitor timer(*TimeMonitor::getNewTimer(cgTimerName));

{
TimeMonitor t(*TimeMonitor::getNewTimer(addTimerName));
p->update(1.0, *x, 0.0, *x, 0.0);
}
{
if (barriers) {
TimeMonitor t(*TimeMonitor::getNewTimer(matvecBarrierTimerName));
comm->barrier();
}
TimeMonitor t(*TimeMonitor::getNewTimer(matvecTimerName));
A->apply(*p, *Ap);
}
{
TimeMonitor t(*TimeMonitor::getNewTimer(addTimerName));
r->update(1.0, *b, -1.0, *Ap, 0.0);
}
{
if (barriers) {
TimeMonitor t(*TimeMonitor::getNewTimer(dotBarrierTimerName));
comm->barrier();
}
TimeMonitor t(*TimeMonitor::getNewTimer(dotTimerName));
rtrans = r->dot(*r);
}

normr = std::sqrt(rtrans);

if (myproc == 0) {
std::cout << "Initial Residual = " << normr << std::endl;
}

magnitude_type brkdown_tol = std::numeric_limits<magnitude_type>::epsilon();
LO k;
for (k = 1; k <= max_iter && normr > tolerance; ++k) {
if (k == 1) {
TimeMonitor t(*TimeMonitor::getNewTimer(addTimerName));
p->update(1.0, *r, 0.0);
} else {
oldrtrans = rtrans;
{
if (barriers) {
TimeMonitor t(*TimeMonitor::getNewTimer(dotBarrierTimerName));
comm->barrier();
}
TimeMonitor t(*TimeMonitor::getNewTimer(dotTimerName));
rtrans = r->dot(*r);
}
{
TimeMonitor t(*TimeMonitor::getNewTimer(addTimerName));
magnitude_type beta = rtrans / oldrtrans;
p->update(1.0, *r, beta);
}
}
normr = std::sqrt(rtrans);
if (myproc == 0 && (k % print_freq == 0 || k == max_iter)) {
std::cout << "Iteration = " << k << " Residual = " << normr << std::endl;
}

magnitude_type alpha = 0;
magnitude_type p_ap_dot = 0;
{
if (barriers) {
TimeMonitor t(*TimeMonitor::getNewTimer(matvecBarrierTimerName));
comm->barrier();
}
TimeMonitor t(*TimeMonitor::getNewTimer(matvecTimerName));
A->apply(*p, *Ap);
}
{
if (barriers) {
TimeMonitor t(*TimeMonitor::getNewTimer(dotBarrierTimerName));
comm->barrier();
}
TimeMonitor t(*TimeMonitor::getNewTimer(dotTimerName));
p_ap_dot = Ap->dot(*p);
}
{
TimeMonitor t(*TimeMonitor::getNewTimer(addTimerName));
if (p_ap_dot < brkdown_tol) {
if (p_ap_dot < 0) {
std::cerr << "miniFE::cg_solve ERROR, numerical breakdown!" << std::endl;
return false;
} else
brkdown_tol = 0.1 * p_ap_dot;
}
alpha = rtrans / p_ap_dot;
x->update(alpha, *p, 1.0);
r->update(-alpha, *Ap, 1.0);
}
}
{
if (barriers) {
TimeMonitor t(*TimeMonitor::getNewTimer(dotBarrierTimerName));
comm->barrier();
}
TimeMonitor t(*TimeMonitor::getNewTimer(dotTimerName));
rtrans = r->dot(*r);
}

normr = std::sqrt(rtrans);
return true;
}

//*************************************************************************************
template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
void SystemSolve(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& A,
Expand Down Expand Up @@ -461,6 +608,8 @@ void SystemSolve(Teuchos::RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal
if (profileSolve) cudaProfilerStop();
#endif
#endif // ifdef HAVE_MUELU_BELOS
} else if (solveType == "simple_cg") {
cg_solve(A, B->getVector(0), X->getVectorNonConst(0), tol, maxIts, true);
} else {
throw MueLu::Exceptions::RuntimeError("Unknown solver type: \"" + solveType + "\"");
}
Expand Down
13 changes: 12 additions & 1 deletion packages/panzer/mini-em/example/BlockPrec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ TRIBITS_COPY_FILES_TO_BINARY_DIR(CopyBlockPrecFiles
darcyTet.xml darcyHex.xml
darcyTetAnalytic.xml darcyHexAnalytic.xml
solverCG.xml solverGMRES.xml
solverMueLu.xml solverMueLu2D.xml solverMueLuEpetra.xml
solverMueLu.xml solverMueLu2D.xml solverMueLuEpetra.xml solverMueLuBarrier.xml
solverMueLuOpenMP.xml solverMueLuCuda.xml solverMueLuTruncated.xml solverMueLuTPL.xml
solverMueLuHO.xml solverMueLuHOCuda.xml
solverAugmentationUseILU.xml
Expand Down Expand Up @@ -90,6 +90,7 @@ TRIBITS_ADD_TEST(
POSTFIX_AND_ARGS_8 "2D" --solver=MueLu --numTimeSteps=1 --linAlgebra=Tpetra --inputFile=maxwell2D.xml
POSTFIX_AND_ARGS_9 "order1_analytic" --solver=MueLu --linAlgebra=Tpetra --inputFile=maxwell-analyticSolution.xml
POSTFIX_AND_ARGS_10 "order1_truncated" --solver=MueLu --numTimeSteps=1 --linAlgebra=Tpetra --truncateMueLuHierarchy
POSTFIX_AND_ARGS_11 "order1_barriers" --solver=MueLu --numTimeSteps=1 --linAlgebra=Tpetra --barriers
NUM_MPI_PROCS 4
)

Expand Down Expand Up @@ -126,6 +127,16 @@ TRIBITS_ADD_TEST(
RUN_SERIAL
)

TRIBITS_ADD_TEST(
BlockPrec
NAME "MiniEM-BlockPrec_RefMaxwell_Performance_16_barriers"
ARGS "--stacked-timer --solver=MueLu --numTimeSteps=3 --linAlgebra=Tpetra --inputFile=maxwell-large.xml --barriers"
COMM mpi
NUM_MPI_PROCS 16
CATEGORIES PERFORMANCE
RUN_SERIAL
)

IF (${PACKAGE_NAME}_ENABLE_ShyLU_NodeTacho)

TRIBITS_ADD_TEST(
Expand Down
3 changes: 3 additions & 0 deletions packages/panzer/mini-em/example/BlockPrec/MiniEM_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ namespace mini_em {
Teuchos::RCP<Teuchos::FancyOStream> &out,
std::string &xml, int basis_order,
const bool preferTPLs,
const bool useBarriers,
const bool truncateMueLuHierarchy) {
using Teuchos::RCP;
using Teuchos::rcp;
Expand Down Expand Up @@ -175,6 +176,8 @@ namespace mini_em {
updateParams("solverMueLuTruncated.xml", lin_solver_pl, comm, out);
if (preferTPLs)
updateParams("solverMueLuTPL.xml", lin_solver_pl, comm, out);
if (useBarriers)
updateParams("solverMueLuBarrier.xml", lin_solver_pl, comm, out);
if (basis_order > 1) {
RCP<Teuchos::ParameterList> lin_solver_pl_lo = lin_solver_pl;
lin_solver_pl = rcp(new Teuchos::ParameterList("Linear Solver"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ namespace mini_em {
std::string &xml,
int basis_order,
const bool preferTPLs = false,
const bool useBarriers = false,
const bool truncateMueLuHierarchy = false);

void setClosureParameters(physicsType physics,
Expand Down
15 changes: 13 additions & 2 deletions packages/panzer/mini-em/example/BlockPrec/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ int main_(Teuchos::CommandLineProcessor &clp, int argc,char * argv[])
solverType solverValues[5] = {AUGMENTATION, MUELU, ML, CG, GMRES};
const char * solverNames[5] = {"Augmentation", "MueLu", "ML", "CG", "GMRES"};
bool preferTPLs = false;
bool useBarriers = false;
bool truncateMueLuHierarchy = false;
solverType solver = MUELU;
int numTimeSteps = 1;
Expand Down Expand Up @@ -159,6 +160,7 @@ int main_(Teuchos::CommandLineProcessor &clp, int argc,char * argv[])
clp.setOption("solverFile",&xml,"XML file with the solver params");
clp.setOption<solverType>("solver",&solver,5,solverValues,solverNames,"Solver that is used");
clp.setOption("tpl", "no-tpl", &preferTPLs, "Prefer TPL usage over fused kernels");
clp.setOption("barriers", "no-barriers", &useBarriers, "Use barriers in the solver");
clp.setOption("truncateMueLuHierarchy", "no-truncateMueLuHierarchy", &truncateMueLuHierarchy, "Truncate the MueLu hierarchy");
clp.setOption("numTimeSteps",&numTimeSteps);
clp.setOption("finalTime",&finalTime);
Expand Down Expand Up @@ -304,7 +306,7 @@ int main_(Teuchos::CommandLineProcessor &clp, int argc,char * argv[])
throw;
}

RCP<Teuchos::ParameterList> lin_solver_pl = mini_em::getSolverParameters(linAlgebra, physics, solver, dim, comm, out, xml, basis_order, preferTPLs, truncateMueLuHierarchy);
RCP<Teuchos::ParameterList> lin_solver_pl = mini_em::getSolverParameters(linAlgebra, physics, solver, dim, comm, out, xml, basis_order, preferTPLs, useBarriers, truncateMueLuHierarchy);

if (lin_solver_pl->sublist("Preconditioner Types").isSublist("Teko") &&
lin_solver_pl->sublist("Preconditioner Types").sublist("Teko").isSublist("Inverse Factory Library")) {
Expand Down Expand Up @@ -678,11 +680,20 @@ int main_(Teuchos::CommandLineProcessor &clp, int argc,char * argv[])
// solve
if (doSolveTimings)
for (int rep = 0; rep < numReps; rep++) {
if (useBarriers) {
Teuchos::TimeMonitor solve_barrier_timer(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: Solve barrier")));
comm->barrier();
}
Thyra::assign(correction_vec.ptr(), zero);
jacobian->solve(Thyra::NOTRANS, *residual, correction_vec.ptr());
}
else
else {
if (useBarriers) {
Teuchos::TimeMonitor solve_barrier_timer(*Teuchos::TimeMonitor::getNewTimer(std::string("Mini-EM: Solve barrier")));
comm->barrier();
}
jacobian->solve(Thyra::NOTRANS, *residual, correction_vec.ptr());
}
Thyra::V_StVpStV(solution_vec.ptr(), one, *solution_vec, -one, *correction_vec);

// compute responses
Expand Down
22 changes: 22 additions & 0 deletions packages/panzer/mini-em/example/BlockPrec/solverMueLuBarrier.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<ParameterList name="Linear Solver">
<ParameterList name="Preconditioner Types">
<ParameterList name="Teko">
<ParameterList name="Inverse Factory Library">
<ParameterList name="Maxwell">

<ParameterList name="S_E Preconditioner">
<ParameterList name="Preconditioner Types">
<ParameterList name="MueLuRefMaxwell">

<Parameter name="sync timers" type="bool" value="true"/>

</ParameterList>

</ParameterList>
</ParameterList>

</ParameterList>
</ParameterList>
</ParameterList>
</ParameterList>
</ParameterList>

0 comments on commit 1669e5d

Please sign in to comment.