From 70b96b5ca252369981c22d9e9534244a53fa476e Mon Sep 17 00:00:00 2001 From: Shintaro Iwasaki Date: Sat, 11 Apr 2020 10:28:51 -0500 Subject: [PATCH 1/5] mpl/thread/argobots: fix MPL_thread_id_t implementation --- src/mpl/include/mpl_thread_argobots.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mpl/include/mpl_thread_argobots.h b/src/mpl/include/mpl_thread_argobots.h index 722f24f0300..67df784a4fe 100644 --- a/src/mpl/include/mpl_thread_argobots.h +++ b/src/mpl/include/mpl_thread_argobots.h @@ -18,7 +18,7 @@ typedef ABT_mutex MPL_thread_mutex_t; typedef ABT_cond MPL_thread_cond_t; -typedef ABT_thread_id MPL_thread_id_t; +typedef ABT_thread MPL_thread_id_t; typedef ABT_key MPL_thread_tls_key_t; /* ====================================================================== @@ -50,9 +50,9 @@ typedef void (*MPL_thread_func_t) (void *data); void MPL_thread_create(MPL_thread_func_t func, void *data, MPL_thread_id_t * idp, int *errp); #define MPL_thread_exit() -#define MPL_thread_self(id_) ABT_thread_self_id(id_) -#define MPL_thread_join(id_) ABT_thread_join(id_) -#define MPL_thread_same(id1_, id2_, same_) ABT_thread_equal(id1_, id2_, same_) +#define MPL_thread_self(idp_) ABT_thread_self(idp_) +#define MPL_thread_join(idp_) ABT_thread_free(idp_) +#define MPL_thread_same(idp1_, idp2_, same_) ABT_thread_equal(*idp1_, *idp2_, same_) /* ====================================================================== * Scheduling From a0bde252c3d84785d9818baf6205fecbebdc6f3b Mon Sep 17 00:00:00 2001 From: Shintaro Iwasaki Date: Sat, 11 Apr 2020 10:29:00 -0500 Subject: [PATCH 2/5] mpit: init/finalize mpl/thread in MPI_T_init_thread()/_finalize() When multithreaded MPI is enabled, MPI_T_init_thread() needs to initialize a mutex, mpi_t_mutex. This mutex should be created after initializing MPL/thread, which is especially necessary when the underlying threads are not Pthreads (e.g., Argobots threads). This patch adds initialization and finalization of MPL/thread in MPI_T_init_thread() and MPI_T_finalize(). --- src/include/mpitimpl.h | 4 ++++ src/mpl/include/mpl_thread_argobots.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/include/mpitimpl.h b/src/include/mpitimpl.h index e281ba5384d..2e1352f26ca 100644 --- a/src/include/mpitimpl.h +++ b/src/include/mpitimpl.h @@ -1192,6 +1192,8 @@ extern MPID_Thread_mutex_t mpi_t_mutex; do { \ int err_; \ MPIR_T_THREAD_CHECK_BEGIN \ + MPID_Thread_init(&err_); \ + MPIR_Assert(err_ == 0); \ MPID_Thread_mutex_create(&mpi_t_mutex, &err_); \ MPIR_Assert(err_ == 0); \ MPIR_T_THREAD_CHECK_END \ @@ -1203,6 +1205,8 @@ extern MPID_Thread_mutex_t mpi_t_mutex; MPIR_T_THREAD_CHECK_BEGIN \ MPID_Thread_mutex_destroy(&mpi_t_mutex, &err_); \ MPIR_Assert(err_ == 0); \ + MPID_Thread_finalize(&err_); \ + MPIR_Assert(err_ == 0); \ MPIR_T_THREAD_CHECK_END \ } while (0) diff --git a/src/mpl/include/mpl_thread_argobots.h b/src/mpl/include/mpl_thread_argobots.h index 67df784a4fe..d9ca9d57ad5 100644 --- a/src/mpl/include/mpl_thread_argobots.h +++ b/src/mpl/include/mpl_thread_argobots.h @@ -25,6 +25,9 @@ typedef ABT_key MPL_thread_tls_key_t; * Creation and misc * ======================================================================*/ +/* MPL_thread_init()/MPL_thread_finalize() can be called in a nested manner + * (e.g., MPI_T_init_thread() and MPI_Init_thread()), but Argobots internally + * maintains a counter so it is okay. */ #define MPL_thread_init(err_ptr_) \ do { \ int err__; \ From bd260dbbaf79b0fc046d45b69c45dfe4b9b18861 Mon Sep 17 00:00:00 2001 From: Shintaro Iwasaki Date: Sat, 11 Apr 2020 10:29:09 -0500 Subject: [PATCH 3/5] test: add scheduling point in threads/idup_deadlock When underlying threads are nonpreemptive (e.g., Argobots threads), a busy loop can cause a deadlock. This patch fixes a busy loop in threads/idup_deadlock by adding MTest_thread_yield(), a new function that is translated into yield when threads are nonpreemptive. --- test/mpi/threads/comm/idup_deadlock.c | 1 + test/mpi/util/mtest_thread_abt.h | 8 ++++++++ test/mpi/util/mtest_thread_pthread.h | 5 +++++ test/mpi/util/mtest_thread_win.h | 5 +++++ 4 files changed, 19 insertions(+) diff --git a/test/mpi/threads/comm/idup_deadlock.c b/test/mpi/threads/comm/idup_deadlock.c index 92dc7b418e0..5afd6f95222 100644 --- a/test/mpi/threads/comm/idup_deadlock.c +++ b/test/mpi/threads/comm/idup_deadlock.c @@ -41,6 +41,7 @@ MTEST_THREAD_RETURN_TYPE test_comm_dup(void *arg) wait = 0; for (i = 0; i < NUM_THREADS; i++) wait += start_idup[i]; + MTest_thread_yield(); } while (wait > NUM_THREADS / 2); } diff --git a/test/mpi/util/mtest_thread_abt.h b/test/mpi/util/mtest_thread_abt.h index e663e3745d8..d4998a9381c 100644 --- a/test/mpi/util/mtest_thread_abt.h +++ b/test/mpi/util/mtest_thread_abt.h @@ -111,6 +111,14 @@ int MTest_thread_lock_free(MTEST_THREAD_LOCK_TYPE * lock) return 0; } +int MTest_thread_yield(void) +{ + int ret; + ret = ABT_thread_yield(); + MTEST_ABT_ERROR(ret, "ABT_thread_yield"); + return 0; +} + /* -------------------------------------------- */ #define HAVE_MTEST_THREAD_BARRIER 1 diff --git a/test/mpi/util/mtest_thread_pthread.h b/test/mpi/util/mtest_thread_pthread.h index 62db23ab55e..a09acae46b9 100644 --- a/test/mpi/util/mtest_thread_pthread.h +++ b/test/mpi/util/mtest_thread_pthread.h @@ -80,6 +80,11 @@ int MTest_thread_lock_free(MTEST_THREAD_LOCK_TYPE * lock) return err; } +int MTest_thread_yield(void) +{ + return 0; +} + /*----------------------------------------------------------------*/ #if defined(HAVE_PTHREAD_BARRIER_INIT) diff --git a/test/mpi/util/mtest_thread_win.h b/test/mpi/util/mtest_thread_win.h index c4f8dbfb268..d051dc2a9b9 100644 --- a/test/mpi/util/mtest_thread_win.h +++ b/test/mpi/util/mtest_thread_win.h @@ -90,3 +90,8 @@ int MTest_thread_lock_free(MTEST_THREAD_LOCK_TYPE * lock) } return MTestReturnValue(errs); } + +int MTest_thread_yield(void) +{ + return 0; +} From db30d4f326f9257c572b9b6af9a6dba20f264eab Mon Sep 17 00:00:00 2001 From: Shintaro Iwasaki Date: Sat, 11 Apr 2020 10:29:16 -0500 Subject: [PATCH 4/5] test/mpi/argobots: use a larger stack size The default stack size of Argobots threads is too small and can cause a stack overflow bug. This patch fixes it. --- test/mpi/util/mtest_thread_abt.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/mpi/util/mtest_thread_abt.h b/test/mpi/util/mtest_thread_abt.h index d4998a9381c..702d6a93f10 100644 --- a/test/mpi/util/mtest_thread_abt.h +++ b/test/mpi/util/mtest_thread_abt.h @@ -53,16 +53,27 @@ extern ABT_pool pools[MTEST_NUM_XSTREAMS]; int MTest_Start_thread(MTEST_THREAD_RETURN_TYPE(*fn) (void *p), void *arg) { int ret; + ABT_thread_attr thread_attr; if (nthreads >= MTEST_MAX_THREADS) { fprintf(stderr, "Too many threads already created: max is %d\n", MTEST_MAX_THREADS); return 1; } + + /* Make stack size large. */ + ret = ABT_thread_attr_create(&thread_attr); + MTEST_ABT_ERROR(ret, "ABT_thread_attr_create"); + ret = ABT_thread_attr_set_stacksize(thread_attr, 2 * 1024 * 1024); + MTEST_ABT_ERROR(ret, "ABT_thread_attr_set_stacksize"); + /* We push threads to pools[0] and let the random work-stealing * scheduler balance things out. */ - ret = ABT_thread_create(pools[0], fn, arg, ABT_THREAD_ATTR_NULL, &threads[nthreads]); + ret = ABT_thread_create(pools[0], fn, arg, thread_attr, &threads[nthreads]); MTEST_ABT_ERROR(ret, "ABT_thread_create"); + ret = ABT_thread_attr_free(&thread_attr); + MTEST_ABT_ERROR(ret, "ABT_thread_attr_free"); + nthreads++; return 0; From f2206430d66f79119eeeb93521dafa5c7137ebb9 Mon Sep 17 00:00:00 2001 From: Shintaro Iwasaki Date: Sat, 11 Apr 2020 10:29:26 -0500 Subject: [PATCH 5/5] test/mpi/threads/spawn: initialize the thread package th_taskmaster.c uses MTest thread functions for forking and joining threads, but the thread package is not initialized. This patch fixes it. Note that we cannot explicitly call MTest_Init_thread() since the corresponding MTest_Finalize() prints "No Errors" as many as spawned MPI processes, breaking this test. --- test/mpi/threads/spawn/th_taskmaster.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/mpi/threads/spawn/th_taskmaster.c b/test/mpi/threads/spawn/th_taskmaster.c index e89374c4a72..46c31fab331 100644 --- a/test/mpi/threads/spawn/th_taskmaster.c +++ b/test/mpi/threads/spawn/th_taskmaster.c @@ -101,6 +101,10 @@ int main(int argc, char *argv[]) #endif /* USE_THREADS */ #ifdef USE_THREADS + /* Initialize the thread package. It should not be initialized via + * MTest_Init_thread since MTest_Finalize cannot be called. */ + MTest_init_thread_pkg(); + /* We need different communicators per thread */ for (i = 0; i < DEFAULT_TASK_WINDOW; i++) CHECK_SUCCESS(MPI_Comm_dup(MPI_COMM_WORLD, &th_comms[i])); @@ -117,6 +121,9 @@ int main(int argc, char *argv[]) for (i = 0; i < DEFAULT_TASK_WINDOW; i++) CHECK_SUCCESS(MPI_Comm_free(&th_comms[i])); + + /* Release the thread package. */ + MTest_finalize_thread_pkg(); #else /* Directly spawn a child process to perform each task */ for (i = 0; i < tasks;) { @@ -141,6 +148,8 @@ int main(int argc, char *argv[]) } fn_exit: + /* Do not call MTest_Finalize (and thus MTest_Init) to avoid printing extra + * "No Errors" as many as spawned MPI processes. */ MPI_Finalize(); return 0;