Skip to content

Commit

Permalink
Merge 'trilinos/Trilinos:develop' (19e4fb2) into 'tcad-charon/Trilino…
Browse files Browse the repository at this point in the history
…s:develop' (19158f2).

* trilinos-develop:
  Add back TeuchosNumerics_DISABLE_STEQR_TEST=ON (trilinos#2410, trilinos#6166)
  MueLu: fixed build error
  kokkos-kernels: update gcc check for c++14 workaround macro
  Ifpack2 ScaledDampedResidual: Cache vectors
  Tpetra/MueLu: switched performance tests to StackedTimer
  • Loading branch information
Jenkins Pipeline committed Oct 27, 2019
2 parents 19158f2 + 19e4fb2 commit e3f515c
Show file tree
Hide file tree
Showing 13 changed files with 94 additions and 55 deletions.
3 changes: 3 additions & 0 deletions cmake/std/atdm/waterman/tweaks/Tweaks.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ ATDM_SET_ENABLE(PanzerAdaptersIOSS_tIOSSConnManager3_MPI_3_DISABLE ON)

IF (Trilinos_ENABLE_DEBUG)

# STEQR() test fails on IBM Power systems with current TPL setup (#2410, #6166)
ATDM_SET_ENABLE(TeuchosNumerics_DISABLE_STEQR_TEST ON)

# Disable Tempus tests that started timing out in debug builds when
# Trilinos_ENABLE_DEBUG=ON was set PR #5970 (#6009)
ATDM_SET_ENABLE(Tempus_BackwardEuler_MPI_1_DISABLE ON)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ class ScaledDampedResidual {
std::unique_ptr<vector_type> X_colMap_;
std::unique_ptr<multivector_type> V1_;

typename multivector_type::dual_view_type::t_host viewW_, viewB_, viewX_;
Teuchos::RCP<vector_type> W_vec_, B_vec_, X_vec_;

// Do the Import, if needed, and return the column Map version of X.
vector_type&
importVector (vector_type& X_domMap);
Expand Down
17 changes: 13 additions & 4 deletions packages/ifpack2/src/Ifpack2_Details_ScaledDampedResidual_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,20 @@ compute (multivector_type& W,

if (canFuse (B)) {
// "nonconst" here has no effect other than on the return type.
RCP<vector_type> W_vec = W.getVectorNonConst (0);
RCP<vector_type> B_vec = B.getVectorNonConst (0);
RCP<vector_type> X_vec = X.getVectorNonConst (0);
if (W_vec_.is_null() || W.getLocalViewHost().data() != viewW_.data()) {
viewW_ = W.getLocalViewHost();
W_vec_ = W.getVectorNonConst (0);
}
if (B_vec_.is_null() || B.getLocalViewHost().data() != viewB_.data()) {
viewB_ = B.getLocalViewHost();
B_vec_ = B.getVectorNonConst (0);
}
if (X_vec_.is_null() || X.getLocalViewHost().data() != viewX_.data()) {
viewX_ = X.getLocalViewHost();
X_vec_ = X.getVectorNonConst (0);
}
TEUCHOS_ASSERT( ! A_crs_.is_null () );
fusedCase (*W_vec, alpha, D_inv, *B_vec, *A_crs_, *X_vec, beta);
fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
}
else {
TEUCHOS_ASSERT( ! A_op_.is_null () );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

#ifdef KOKKOS_ENABLE_CXX14
#ifdef KOKKOS_COMPILER_GNU
#if KOKKOS_COMPILER_GNU<=720
#if KOKKOS_COMPILER_GNU<=740
#define KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
#endif
#endif
Expand Down
16 changes: 10 additions & 6 deletions packages/muelu/test/scaling/MatrixMatrixMultiply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
#endif

#include <Teuchos_StandardCatchMacros.hpp>
#include <Teuchos_TimeMonitor.hpp>
#include <Teuchos_StackedTimer.hpp>

#include <Xpetra_MultiVectorFactory.hpp>
#include <Xpetra_MatrixMatrix.hpp>
Expand Down Expand Up @@ -96,6 +98,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar
using Teuchos::rcp;
using Teuchos::Time;
using Teuchos::TimeMonitor;
using Teuchos::StackedTimer;
using namespace MueLuTests;

RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
Expand Down Expand Up @@ -133,8 +136,10 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar
unsigned int seed = generateSeed(*comm, optSeed);
Teuchos::ScalarTraits<SC>::seedrandom(seed);

RCP<StackedTimer> timer = rcp(new StackedTimer("MatrixMatrix Multiply: Total"));
TimeMonitor::setStackedTimer(timer);

for (int jj=0; jj<optNmults; ++jj) {
TimeMonitor globalTimeMonitor(*TimeMonitor::getNewTimer("MatrixMatrixMultiplyTest: S - Global Time"));

RCP<Matrix> A;
RCP<Matrix> B;
Expand Down Expand Up @@ -199,11 +204,10 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar

} //for (int jj=0; jj<optNmults; ++jj)

if (optTimings) {
Teuchos::TableFormat &format = TimeMonitor::format();
format.setPrecision(25);
TimeMonitor::summarize();
}
timer->stopBaseTimer();
StackedTimer::OutputOptions options;
options.print_warnings = false;
timer->report(std::cout, comm, options);

if (comm->getRank() == 0)
std::cout << "End Result: TEST PASSED";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ ENDIF()
TRIBITS_ADD_TEST(
FEMAssembly
NAME Performance_StrongScaling_FEMAssembly_InsertGlobalIndicesFESPKokkos
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=480 --num-elements-y=480 --kokkos"
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=8192 --num-elements-y=8192 --kokkos"
COMM mpi
NUM_MPI_PROCS 1
STANDARD_PASS_OUTPUT
Expand All @@ -109,7 +109,7 @@ TRIBITS_ADD_TEST(
TRIBITS_ADD_TEST(
FEMAssembly
NAME Performance_StrongScaling_FEMAssembly_InsertGlobalIndicesFESPKokkos
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=480 --num-elements-y=480 --kokkos"
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=8192 --num-elements-y=8192 --kokkos"
COMM mpi
NUM_MPI_PROCS 4
STANDARD_PASS_OUTPUT
Expand All @@ -120,7 +120,7 @@ TRIBITS_ADD_TEST(
TRIBITS_ADD_TEST(
FEMAssembly
NAME Performance_StrongScaling_FEMAssembly_InsertGlobalIndicesFESPKokkos
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=480 --num-elements-y=480 --kokkos"
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=8192 --num-elements-y=8192 --kokkos"
COMM mpi
NUM_MPI_PROCS 9
STANDARD_PASS_OUTPUT
Expand All @@ -131,7 +131,7 @@ TRIBITS_ADD_TEST(
TRIBITS_ADD_TEST(
FEMAssembly
NAME Performance_StrongScaling_FEMAssembly_InsertGlobalIndicesFESPKokkos
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=480 --num-elements-y=480 --kokkos"
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=8192 --num-elements-y=8192 --kokkos"
COMM mpi
NUM_MPI_PROCS 16
STANDARD_PASS_OUTPUT
Expand All @@ -142,7 +142,7 @@ TRIBITS_ADD_TEST(
TRIBITS_ADD_TEST(
FEMAssembly
NAME Performance_StrongScaling_FEMAssembly_InsertGlobalIndicesFESPKokkos
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=480 --num-elements-y=480 --kokkos"
ARGS "--with-insert-global-indices-fe --with-StaticProfile --num-elements-x=8192 --num-elements-y=8192 --kokkos"
COMM mpi
NUM_MPI_PROCS 25
STANDARD_PASS_OUTPUT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ int executeInsertGlobalIndicesDP_(const Teuchos::RCP<const Teuchos::Comm<int> >&

auto owned_element_to_node_ids = mesh.getOwnedElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();
RCP<TimeMonitor> timerElementLoopGraph = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("1) ElementLoop (Graph)")));

RCP<crs_graph_type> crs_graph = rcp(new crs_graph_type(row_map, 0));
Expand Down Expand Up @@ -279,7 +279,7 @@ int executeInsertGlobalIndicesDP_(const Teuchos::RCP<const Teuchos::Comm<int> >&
}


timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Print out crs_matrix details.
if(opts.verbose) crs_matrix->describe(out, Teuchos::VERB_EXTREME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ int executeInsertGlobalIndicesFESP_(const Teuchos::RCP<const Teuchos::Comm<int>

auto owned_element_to_node_ids = mesh.getOwnedElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();
RCP<TimeMonitor> timerElementLoopGraph = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("1) ElementLoop (Graph)")));

RCP<fe_graph_type> fe_graph = rcp(new fe_graph_type(row_map, owned_plus_shared_map, 16));
Expand Down Expand Up @@ -301,8 +301,7 @@ int executeInsertGlobalIndicesFESP_(const Teuchos::RCP<const Teuchos::Comm<int>
Tpetra::endFill(*rhs);
}


timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Print out fe_matrix details.
if(opts.verbose) fe_matrix->describe(out, Teuchos::VERB_EXTREME);
Expand Down Expand Up @@ -379,7 +378,7 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm

auto owned_element_to_node_ids = mesh.getOwnedElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();

RCP<fe_graph_type> fe_graph =
rcp (new fe_graph_type (row_map, owned_plus_shared_map, 16));
Expand Down Expand Up @@ -544,8 +543,7 @@ int executeInsertGlobalIndicesFESPKokkos_(const Teuchos::RCP<const Teuchos::Comm
Tpetra::endFill(*rhs);
}


timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Print out fe_matrix details.
if(opts.verbose) fe_matrix->describe(out, Teuchos::VERB_EXTREME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ int executeLocalElementLoopDP_(const Teuchos::RCP<const Teuchos::Comm<int> >& co

auto owned_element_to_node_ids = mesh.getOwnedElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();
RCP<TimeMonitor> timerElementLoopGraph = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("1) ElementLoop (All Graph)")));

// Type-2 Assembly distinguishes owned and overlapping nodes.
Expand Down Expand Up @@ -335,7 +335,7 @@ int executeLocalElementLoopDP_(const Teuchos::RCP<const Teuchos::Comm<int> >& co
crs_matrix_owned->fillComplete();
}

timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Print out crs_matrix_owned and crs_matrix_overlapping details.
if(opts.verbose)
Expand Down Expand Up @@ -414,7 +414,7 @@ int executeLocalElementLoopDPKokkos_(const Teuchos::RCP<const Teuchos::Comm<int>

auto owned_element_to_node_ids = mesh.getOwnedElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();
RCP<TimeMonitor> timerElementLoopGraph = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("1) ElementLoop (All Graph)")));

// Type-2 Assembly distinguishes owned and overlapping nodes.
Expand Down Expand Up @@ -622,7 +622,7 @@ int executeLocalElementLoopDPKokkos_(const Teuchos::RCP<const Teuchos::Comm<int>
crs_matrix_owned->fillComplete();
}

timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Print out crs_matrix_owned and crs_matrix_overlapping details.
if(opts.verbose)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ int executeTotalElementLoopDP_(const Teuchos::RCP<const Teuchos::Comm<int> >& co
auto owned_element_to_node_ids = mesh.getOwnedElementToNode();
auto ghost_element_to_node_ids = mesh.getGhostElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();
RCP<TimeMonitor> timerElementLoopGraph = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("1) ElementLoop (Graph)")));

RCP<crs_graph_type> crs_graph = rcp(new crs_graph_type(row_map, 0));
Expand Down Expand Up @@ -333,7 +333,7 @@ int executeTotalElementLoopDP_(const Teuchos::RCP<const Teuchos::Comm<int> >& co
crs_matrix->fillComplete();
}

timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Print out crs_matrix details.
if(opts.verbose) crs_matrix->describe(out, Teuchos::VERB_EXTREME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ int executeTotalElementLoopSP_(const Teuchos::RCP<const Teuchos::Comm<int> >& co
auto owned_element_to_node_ids = mesh.getOwnedElementToNode();
auto ghost_element_to_node_ids = mesh.getGhostElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();
RCP<TimeMonitor> timerElementLoopGraph = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("1) ElementLoop (Graph)")));

RCP<crs_graph_type> crs_graph = rcp(new crs_graph_type(row_map, maxEntriesPerRow, Tpetra::StaticProfile));
Expand Down Expand Up @@ -353,7 +353,7 @@ int executeTotalElementLoopSP_(const Teuchos::RCP<const Teuchos::Comm<int> >& co
crs_matrix.describe (out, Teuchos::VERB_EXTREME);
}

timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Save crs_matrix as a MatrixMarket file.
if (opts.saveMM) {
Expand Down Expand Up @@ -431,7 +431,7 @@ executeTotalElementLoopSPKokkos_
auto owned_element_to_node_ids = mesh.getOwnedElementToNode();
auto ghost_element_to_node_ids = mesh.getGhostElementToNode();

RCP<TimeMonitor> timerGlobal = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("X) Global")));
Teuchos::TimeMonitor::getStackedTimer()->startBaseTimer();
RCP<TimeMonitor> timerElementLoopGraph = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("1) ElementLoop (Graph)")));

RCP<crs_graph_type> crs_graph = rcp(new crs_graph_type(row_map, maxEntriesPerRow, Tpetra::StaticProfile));
Expand Down Expand Up @@ -644,7 +644,7 @@ executeTotalElementLoopSPKokkos_
// Print out crs_matrix details.
if(opts.verbose) crs_matrix->describe(out, Teuchos::VERB_EXTREME);

timerGlobal = Teuchos::null;
Teuchos::TimeMonitor::getStackedTimer()->stopBaseTimer();

// Save crs_matrix as a MatrixMarket file.
if(opts.saveMM)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include "MatrixMarket_Tpetra.hpp"
#include "Teuchos_RCP.hpp"
#include "Teuchos_FancyOStream.hpp"
#include "Teuchos_TimeMonitor.hpp"
#include "Teuchos_StackedTimer.hpp"

#include "fem_assembly_commandLineOpts.hpp"
#include "fem_assembly_typedefs.hpp"
Expand All @@ -67,6 +69,9 @@ int main (int argc, char *argv[])
{
using std::endl;
using Teuchos::RCP;
using Teuchos::rcp;
using Teuchos::TimeMonitor;
using Teuchos::StackedTimer;

int status = EXIT_SUCCESS;

Expand Down Expand Up @@ -96,6 +101,13 @@ int main (int argc, char *argv[])
return status;
}

RCP<StackedTimer> timer = Teuchos::null;
if(opts.timing)
{
timer = rcp(new StackedTimer("X) Global", false));
TimeMonitor::setStackedTimer(timer);
}

// Entry point
if(opts.useStaticProfile)
{
Expand All @@ -114,8 +126,13 @@ int main (int argc, char *argv[])
status = EXIT_FAILURE;
}

// Print out timing results.
if(opts.timing) Teuchos::TimeMonitor::report(comm.ptr(), std::cout, "");
if(opts.timing)
{
//note: base timer was already stopped by executeInsertGlobalIndices...()
StackedTimer::OutputOptions timeReportOpts;
timeReportOpts.print_warnings = false;
timer->report(std::cout, comm, timeReportOpts);
}

// This tells the Trilinos test framework that the test passed.
if(EXIT_SUCCESS == comm->getRank()) out << "End Result: TEST PASSED" << endl;
Expand Down
Loading

0 comments on commit e3f515c

Please sign in to comment.