diff --git a/test/mpi/configure.ac b/test/mpi/configure.ac index 00fe982b108..c81c6d6d66c 100644 --- a/test/mpi/configure.ac +++ b/test/mpi/configure.ac @@ -1692,5 +1692,6 @@ AC_OUTPUT(maint/testmerge \ impls/mpich/threads/pt2pt/Makefile \ impls/mpich/cuda/Makefile \ impls/mpich/misc/Makefile \ + impls/mpich/ulfm/Makefile \ ) diff --git a/test/mpi/impls/mpich/Makefile.am b/test/mpi/impls/mpich/Makefile.am index 1f04f19665d..66ad0e68b08 100644 --- a/test/mpi/impls/mpich/Makefile.am +++ b/test/mpi/impls/mpich/Makefile.am @@ -7,5 +7,5 @@ include $(top_srcdir)/Makefile_single.mtest EXTRA_DIST = testlist.in -static_subdirs = mpi_t comm misc +static_subdirs = mpi_t comm misc ulfm SUBDIRS = $(static_subdirs) $(threadsdir) $(cudadir) diff --git a/test/mpi/impls/mpich/ulfm/Makfile.am b/test/mpi/impls/mpich/ulfm/Makfile.am new file mode 100644 index 00000000000..a1099d2fa42 --- /dev/null +++ b/test/mpi/impls/mpich/ulfm/Makfile.am @@ -0,0 +1,11 @@ +## +## Copyright (C) by Argonne National Laboratory +## See COPYRIGHT in top-level directory +## + +include $(top_srcdir)/Makefile_single.mtest + +EXTRA_DIST = testlist + +noinst_PROGRAMS = \ + get_failed diff --git a/test/mpi/impls/mpich/ulfm/get_failed.c b/test/mpi/impls/mpich/ulfm/get_failed.c new file mode 100644 index 00000000000..aae1bbffa38 --- /dev/null +++ b/test/mpi/impls/mpich/ulfm/get_failed.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) by Argonne National Laboratory + * See COPYRIGHT in top-level directory + */ + +#include +#include +#include + +/* + * This test makes sure that after a failure, the correct group of failed + * processes is returned from MPIX_Comm_get_failed. + */ +int main(int argc, char **argv) +{ + int rank, size, err, result; + char buf[10] = " No errors"; + MPI_Group failed_grp, world_grp; + int world_ranks[] = { 0, 1, 2 }; + int failed_ranks[3]; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size < 3) { + fprintf(stderr, "Must run with at least 3 processes\n"); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MPI_Comm_group(MPI_COMM_WORLD, &world_grp); + MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); + + if (rank == 1) { + exit(EXIT_FAILURE); + } + + if (rank == 0) { + err = MPI_Recv(buf, 10, MPI_CHAR, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (MPI_SUCCESS == err) { + fprintf(stderr, "Expected a failure for receive from rank 1\n"); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + err = MPIX_Comm_get_failed(MPI_COMM_WORLD, &failed_grp); + if (MPI_SUCCESS != err) { + int ec; + char error[MPI_MAX_ERROR_STRING]; + MPI_Error_class(err, &ec); + MPI_Error_string(err, error, &size); + fprintf(stderr, "MPIX_Comm_get_failed returned an error: %d\n%s", ec, error); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MPI_Group one_grp; + int one[] = { 1 }; + MPI_Group_incl(world_grp, 1, one, &one_grp); + MPI_Group_compare(one_grp, failed_grp, &result); + if (MPI_IDENT != result) { + fprintf(stderr, "First failed group contains incorrect processes\n"); + MPI_Group_size(failed_grp, &size); + MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks); + for (int i = 0; i < size; i++) + fprintf(stderr, "DEAD: %d\n", failed_ranks[i]); + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Group_free(&failed_grp); + MPI_Group_free(&one_grp); + + err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (MPI_SUCCESS != err) { + fprintf(stderr, "First receive failed\n"); + MPI_Abort(MPI_COMM_WORLD, 1); + } + err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if (MPI_SUCCESS == err) { + fprintf(stderr, "Expected a failure for receive from rank 2\n"); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + err = MPIX_Comm_get_failed(MPI_COMM_WORLD, &failed_grp); + if (MPI_SUCCESS != err) { + int ec; + char error[MPI_MAX_ERROR_STRING]; + MPI_Error_class(err, &ec); + MPI_Error_class(err, &ec); + MPI_Error_string(err, error, &size); + fprintf(stderr, "MPIX_Comm_get_failed returned an error: %d\n%s", ec, error); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + MPI_Group two_grp; + int two[] = { 1, 2 }; + MPI_Group_incl(world_grp, 2, two, &two_grp); + MPI_Group_compare(two_grp, failed_grp, &result); + if (MPI_IDENT != result) { + fprintf(stderr, "Second failed group contains incorrect processes\n"); + MPI_Group_size(failed_grp, &size); + MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks); + for (int i = 0; i < size; i++) + fprintf(stderr, "DEAD: %d\n", failed_ranks[i]); + MPI_Abort(MPI_COMM_WORLD, 1); + } + MPI_Group_free(&failed_grp); + MPI_Group_free(&two_grp); + + fprintf(stdout, " No errors\n"); + } else if (rank == 2) { + MPI_Ssend(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD); + + exit(EXIT_FAILURE); + } + + MPI_Group_free(&world_grp); + MPI_Finalize(); +} diff --git a/test/mpi/impls/mpich/ulfm/testlist b/test/mpi/impls/mpich/ulfm/testlist new file mode 100644 index 00000000000..3cdd673c77f --- /dev/null +++ b/test/mpi/impls/mpich/ulfm/testlist @@ -0,0 +1 @@ +get_failed 3 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors timeLimit=10