Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Argobots support #4421

Merged
merged 5 commits into from
Apr 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/include/mpitimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,8 @@ extern MPID_Thread_mutex_t mpi_t_mutex;
do { \
int err_; \
MPIR_T_THREAD_CHECK_BEGIN \
MPID_Thread_init(&err_); \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can argobot init be called multiple times?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It is okay to call it multiple times (internally there's a refcount).
If it wouldn't be the case, MPL_thread_init() will take care of it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This concern is mentioned in src/mpl/include/mpl_thread_argobots.h.

MPIR_Assert(err_ == 0); \
MPID_Thread_mutex_create(&mpi_t_mutex, &err_); \
MPIR_Assert(err_ == 0); \
MPIR_T_THREAD_CHECK_END \
Expand All @@ -1203,6 +1205,8 @@ extern MPID_Thread_mutex_t mpi_t_mutex;
MPIR_T_THREAD_CHECK_BEGIN \
MPID_Thread_mutex_destroy(&mpi_t_mutex, &err_); \
MPIR_Assert(err_ == 0); \
MPID_Thread_finalize(&err_); \
MPIR_Assert(err_ == 0); \
MPIR_T_THREAD_CHECK_END \
} while (0)

Expand Down
11 changes: 7 additions & 4 deletions src/mpl/include/mpl_thread_argobots.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@

typedef ABT_mutex MPL_thread_mutex_t;
typedef ABT_cond MPL_thread_cond_t;
typedef ABT_thread_id MPL_thread_id_t;
typedef ABT_thread MPL_thread_id_t;
typedef ABT_key MPL_thread_tls_key_t;

/* ======================================================================
* Creation and misc
* ======================================================================*/

/* MPL_thread_init()/MPL_thread_finalize() can be called in a nested manner
* (e.g., MPI_T_init_thread() and MPI_Init_thread()), but Argobots internally
* maintains a counter so it is okay. */
#define MPL_thread_init(err_ptr_) \
do { \
int err__; \
Expand All @@ -50,9 +53,9 @@ typedef void (*MPL_thread_func_t) (void *data);
void MPL_thread_create(MPL_thread_func_t func, void *data, MPL_thread_id_t * idp, int *errp);

#define MPL_thread_exit()
#define MPL_thread_self(id_) ABT_thread_self_id(id_)
#define MPL_thread_join(id_) ABT_thread_join(id_)
#define MPL_thread_same(id1_, id2_, same_) ABT_thread_equal(id1_, id2_, same_)
#define MPL_thread_self(idp_) ABT_thread_self(idp_)
#define MPL_thread_join(idp_) ABT_thread_free(idp_)
#define MPL_thread_same(idp1_, idp2_, same_) ABT_thread_equal(*idp1_, *idp2_, same_)

/* ======================================================================
* Scheduling
Expand Down
1 change: 1 addition & 0 deletions test/mpi/threads/comm/idup_deadlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ MTEST_THREAD_RETURN_TYPE test_comm_dup(void *arg)
wait = 0;
for (i = 0; i < NUM_THREADS; i++)
wait += start_idup[i];
MTest_thread_yield();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shintaro-iwasaki MTest_thread_yield() is not in a header file that is included in idup_deadlock.c, causing a compiler warning. Could you fix it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops I missed it. #4503 should fix it. Thanks!

} while (wait > NUM_THREADS / 2);
}

Expand Down
9 changes: 9 additions & 0 deletions test/mpi/threads/spawn/th_taskmaster.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ int main(int argc, char *argv[])
#endif /* USE_THREADS */

#ifdef USE_THREADS
/* Initialize the thread package. It should not be initialized via
* MTest_Init_thread since MTest_Finalize cannot be called. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is because both MTest_Init_thread and MTest_Finalize are not called rather than cannot be called, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. I will change the comment. Thanks a lot for reviewing this PR!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hzhou I fixed the comment.

MTest_init_thread_pkg();

/* We need different communicators per thread */
for (i = 0; i < DEFAULT_TASK_WINDOW; i++)
CHECK_SUCCESS(MPI_Comm_dup(MPI_COMM_WORLD, &th_comms[i]));
Expand All @@ -117,6 +121,9 @@ int main(int argc, char *argv[])

for (i = 0; i < DEFAULT_TASK_WINDOW; i++)
CHECK_SUCCESS(MPI_Comm_free(&th_comms[i]));

/* Release the thread package. */
MTest_finalize_thread_pkg();
#else
/* Directly spawn a child process to perform each task */
for (i = 0; i < tasks;) {
Expand All @@ -141,6 +148,8 @@ int main(int argc, char *argv[])
}

fn_exit:
/* Do not call MTest_Finalize (and thus MTest_Init) to avoid printing extra
* "No Errors" as many as spawned MPI processes. */
MPI_Finalize();

return 0;
Expand Down
21 changes: 20 additions & 1 deletion test/mpi/util/mtest_thread_abt.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,27 @@ extern ABT_pool pools[MTEST_NUM_XSTREAMS];
int MTest_Start_thread(MTEST_THREAD_RETURN_TYPE(*fn) (void *p), void *arg)
{
int ret;
ABT_thread_attr thread_attr;

if (nthreads >= MTEST_MAX_THREADS) {
fprintf(stderr, "Too many threads already created: max is %d\n", MTEST_MAX_THREADS);
return 1;
}

/* Make stack size large. */
ret = ABT_thread_attr_create(&thread_attr);
MTEST_ABT_ERROR(ret, "ABT_thread_attr_create");
ret = ABT_thread_attr_set_stacksize(thread_attr, 2 * 1024 * 1024);
MTEST_ABT_ERROR(ret, "ABT_thread_attr_set_stacksize");

/* We push threads to pools[0] and let the random work-stealing
* scheduler balance things out. */
ret = ABT_thread_create(pools[0], fn, arg, ABT_THREAD_ATTR_NULL, &threads[nthreads]);
ret = ABT_thread_create(pools[0], fn, arg, thread_attr, &threads[nthreads]);
MTEST_ABT_ERROR(ret, "ABT_thread_create");

ret = ABT_thread_attr_free(&thread_attr);
MTEST_ABT_ERROR(ret, "ABT_thread_attr_free");

nthreads++;

return 0;
Expand Down Expand Up @@ -111,6 +122,14 @@ int MTest_thread_lock_free(MTEST_THREAD_LOCK_TYPE * lock)
return 0;
}

int MTest_thread_yield(void)
{
int ret;
ret = ABT_thread_yield();
MTEST_ABT_ERROR(ret, "ABT_thread_yield");
return 0;
}

/* -------------------------------------------- */
#define HAVE_MTEST_THREAD_BARRIER 1

Expand Down
5 changes: 5 additions & 0 deletions test/mpi/util/mtest_thread_pthread.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ int MTest_thread_lock_free(MTEST_THREAD_LOCK_TYPE * lock)
return err;
}

int MTest_thread_yield(void)
{
return 0;
}

/*----------------------------------------------------------------*/
#if defined(HAVE_PTHREAD_BARRIER_INIT)

Expand Down
5 changes: 5 additions & 0 deletions test/mpi/util/mtest_thread_win.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,8 @@ int MTest_thread_lock_free(MTEST_THREAD_LOCK_TYPE * lock)
}
return MTestReturnValue(errs);
}

int MTest_thread_yield(void)
{
return 0;
}