Skip to content

Commit

Permalink
Merge pull request #7880 from trilinos/tasmit/cuda-launch-blocking
Browse files Browse the repository at this point in the history
Tpetra:  CUDA_LAUNCH_BLOCKING tests
  • Loading branch information
tasmith4 authored Aug 21, 2020
2 parents b00b1e9 + ae04f91 commit 1bd6168
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 1 deletion.
17 changes: 17 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ namespace { // (anonymous)
#endif // TPETRA_ASSUME_CUDA_AWARE_MPI
}

constexpr bool cudaLaunchBlockingDefault () {
return false;
}

constexpr bool hierarchicalUnpackDefault () {
return true;
}
Expand Down Expand Up @@ -351,6 +355,19 @@ bool Behavior::assumeMpiIsCudaAware ()
defaultValue);
}

bool Behavior::cudaLaunchBlocking ()
{
constexpr char envVarName[] = "CUDA_LAUNCH_BLOCKING";
constexpr bool defaultValue = cudaLaunchBlockingDefault ();

static bool value_ = defaultValue;
static bool initialized_ = false;
return idempotentlyGetEnvironmentVariableAsBool (value_,
initialized_,
envVarName,
defaultValue);
}

int Behavior::TAFC_OptimizationCoreCount ()
{
// only call getenv once, save the value.
Expand Down
3 changes: 3 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ class Behavior {
/// For a discussion, see Trilinos GitHub issues #1571 and #1088.
static bool assumeMpiIsCudaAware ();

/// \brief Whether the CUDA_LAUNCH_BLOCKING environment variable has been set.
static bool cudaLaunchBlocking ();

/// \brief MPI process count above which
/// Tpetra::CrsMatrix::transferAndFillComplete will attempt to do
/// advanced neighbor discovery.
Expand Down
3 changes: 3 additions & 0 deletions packages/tpetra/core/test/Behavior/Behavior_Off.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ TEUCHOS_STATIC_SETUP()
setenv("TPETRA_DEBUG", "OFF", 1);
setenv("TPETRA_VERBOSE", "OFF", 1);
setenv("TPETRA_ASSUME_CUDA_AWARE_MPI", "OFF", 1);
setenv("CUDA_LAUNCH_BLOCKING", "0", 1);
}

TEUCHOS_UNIT_TEST(Behavior, Off)
Expand All @@ -88,5 +89,7 @@ TEUCHOS_UNIT_TEST(Behavior, Off)
// so any query on TPETRA_ASSUME_CUDA_AWARE_MPI should evaluate to false.
bool cuda_aware_mpi = Tpetra::Details::Behavior::assumeMpiIsCudaAware();
TEUCHOS_TEST_ASSERT(!cuda_aware_mpi, out, success);

TEST_ASSERT(!Tpetra::Details::Behavior::cudaLaunchBlocking());
}
} // namespace (anonymous)
3 changes: 3 additions & 0 deletions packages/tpetra/core/test/Behavior/Behavior_On.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ TEUCHOS_STATIC_SETUP()
setenv("TPETRA_DEBUG", "ON", 1);
setenv("TPETRA_VERBOSE", "ON", 1);
setenv("TPETRA_ASSUME_CUDA_AWARE_MPI", "ON", 1);
setenv("CUDA_LAUNCH_BLOCKING", "1", 1);
}

TEUCHOS_UNIT_TEST(Behavior, On)
Expand All @@ -88,5 +89,7 @@ TEUCHOS_UNIT_TEST(Behavior, On)
// so any query on TPETRA_ASSUME_CUDA_AWARE_MPI should evaluate to true
bool cuda_aware_mpi = Tpetra::Details::Behavior::assumeMpiIsCudaAware();
TEUCHOS_TEST_ASSERT(cuda_aware_mpi, out, success);

TEST_ASSERT(Tpetra::Details::Behavior::cudaLaunchBlocking());
}
} // namespace (anonymous)
64 changes: 64 additions & 0 deletions packages/tpetra/core/test/PerformanceCGSolve/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,70 @@ IF (Tpetra_INST_DOUBLE)
CATEGORIES PERFORMANCE
)

IF (Tpetra_ENABLE_CUDA)
MESSAGE(STATUS "Tpetra: Performance-CGSolve CUDA_LAUNCH_BLOCKING test ENABLED")

TRIBITS_ADD_TEST(
Performance-CGSolve
NAME Performance_StrongScaling_CGSolve_CUDA_LAUNCH_BLOCKING
ARGS "--size=200"
COMM mpi
NUM_MPI_PROCS 1
ENVIRONMENT CUDA_LAUNCH_BLOCKING=1
STANDARD_PASS_OUTPUT
RUN_SERIAL
CATEGORIES PERFORMANCE
)

TRIBITS_ADD_TEST(
Performance-CGSolve
NAME Performance_StrongScaling_CGSolve_CUDA_LAUNCH_BLOCKING
ARGS "--size=200"
COMM mpi
NUM_MPI_PROCS 4
ENVIRONMENT CUDA_LAUNCH_BLOCKING=1
STANDARD_PASS_OUTPUT
RUN_SERIAL
CATEGORIES PERFORMANCE
)

TRIBITS_ADD_TEST(
Performance-CGSolve
NAME Performance_StrongScaling_CGSolve_CUDA_LAUNCH_BLOCKING
ARGS "--size=200"
COMM mpi
NUM_MPI_PROCS 9
ENVIRONMENT CUDA_LAUNCH_BLOCKING=1
STANDARD_PASS_OUTPUT
RUN_SERIAL
CATEGORIES PERFORMANCE
)

TRIBITS_ADD_TEST(
Performance-CGSolve
NAME Performance_StrongScaling_CGSolve_CUDA_LAUNCH_BLOCKING
ARGS "--size=200"
COMM mpi
NUM_MPI_PROCS 16
ENVIRONMENT CUDA_LAUNCH_BLOCKING=1
STANDARD_PASS_OUTPUT
RUN_SERIAL
CATEGORIES PERFORMANCE
)

TRIBITS_ADD_TEST(
Performance-CGSolve
NAME Performance_StrongScaling_CGSolve_CUDA_LAUNCH_BLOCKING
ARGS "--size=200"
COMM mpi
NUM_MPI_PROCS 25
ENVIRONMENT CUDA_LAUNCH_BLOCKING=1
STANDARD_PASS_OUTPUT
RUN_SERIAL
CATEGORIES PERFORMANCE
)
ENDIF()

ENDIF()

#TRIBITS_COPY_FILES_TO_BINARY_DIR(CopyXmlForTpetraPerfCgSolve
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,11 @@ int run (int argc, char *argv[])
StackedTimer::OutputOptions options;
options.print_warnings = false;
timer->report(std::cout, comm, options);
auto xmlOut = timer->reportWatchrXML(std::string("Tpetra CGSolve ") + std::to_string(comm->getSize()) + " ranks", comm);

std::string testBaseName = "Tpetra CGSolve ";
if (Tpetra::Details::Behavior::cudaLaunchBlocking()) testBaseName += "CUDA_LAUNCH_BLOCKING ";

auto xmlOut = timer->reportWatchrXML(testBaseName + std::to_string(comm->getSize()) + " ranks", comm);
if(myRank == 0)
{
if(xmlOut.length())
Expand Down

0 comments on commit 1bd6168

Please sign in to comment.