Skip to content

Commit

Permalink
test: add impls/mpich/ulfm/get_failed.c
Browse files Browse the repository at this point in the history
This is adapted from ft/failure_ack.c. The current ft tests are disabled
by wholesale. Add to impls to get it tested. MPIX functions need be in
impls folder anyway.
  • Loading branch information
hzhou committed Apr 2, 2022
1 parent 668a362 commit 8231dfd
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 1 deletion.
1 change: 1 addition & 0 deletions test/mpi/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -1692,5 +1692,6 @@ AC_OUTPUT(maint/testmerge \
impls/mpich/threads/pt2pt/Makefile \
impls/mpich/cuda/Makefile \
impls/mpich/misc/Makefile \
impls/mpich/ulfm/Makefile \
)

2 changes: 1 addition & 1 deletion test/mpi/impls/mpich/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ include $(top_srcdir)/Makefile_single.mtest

EXTRA_DIST = testlist.in

static_subdirs = mpi_t comm misc
static_subdirs = mpi_t comm misc ulfm
SUBDIRS = $(static_subdirs) $(threadsdir) $(cudadir)
11 changes: 11 additions & 0 deletions test/mpi/impls/mpich/ulfm/Makfile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
##
## Copyright (C) by Argonne National Laboratory
## See COPYRIGHT in top-level directory
##

include $(top_srcdir)/Makefile_single.mtest

EXTRA_DIST = testlist

noinst_PROGRAMS = \
get_failed
115 changes: 115 additions & 0 deletions test/mpi/impls/mpich/ulfm/get_failed.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright (C) by Argonne National Laboratory
* See COPYRIGHT in top-level directory
*/

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

/*
* This test makes sure that after a failure, the correct group of failed
* processes is returned from MPIX_Comm_get_failed.
*/
int main(int argc, char **argv)
{
int rank, size, err, result;
char buf[10] = " No errors";
MPI_Group failed_grp, world_grp;
int world_ranks[] = { 0, 1, 2 };
int failed_ranks[3];

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size < 3) {
fprintf(stderr, "Must run with at least 3 processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}

MPI_Comm_group(MPI_COMM_WORLD, &world_grp);
MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

if (rank == 1) {
exit(EXIT_FAILURE);
}

if (rank == 0) {
err = MPI_Recv(buf, 10, MPI_CHAR, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
if (MPI_SUCCESS == err) {
fprintf(stderr, "Expected a failure for receive from rank 1\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}

err = MPIX_Comm_get_failed(MPI_COMM_WORLD, &failed_grp);
if (MPI_SUCCESS != err) {
int ec;
char error[MPI_MAX_ERROR_STRING];
MPI_Error_class(err, &ec);
MPI_Error_string(err, error, &size);
fprintf(stderr, "MPIX_Comm_get_failed returned an error: %d\n%s", ec, error);
MPI_Abort(MPI_COMM_WORLD, 1);
}

MPI_Group one_grp;
int one[] = { 1 };
MPI_Group_incl(world_grp, 1, one, &one_grp);
MPI_Group_compare(one_grp, failed_grp, &result);
if (MPI_IDENT != result) {
fprintf(stderr, "First failed group contains incorrect processes\n");
MPI_Group_size(failed_grp, &size);
MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks);
for (int i = 0; i < size; i++)
fprintf(stderr, "DEAD: %d\n", failed_ranks[i]);
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Group_free(&failed_grp);
MPI_Group_free(&one_grp);

err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
if (MPI_SUCCESS != err) {
fprintf(stderr, "First receive failed\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
err = MPI_Recv(buf, 10, MPI_CHAR, 2, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
if (MPI_SUCCESS == err) {
fprintf(stderr, "Expected a failure for receive from rank 2\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}

err = MPIX_Comm_get_failed(MPI_COMM_WORLD, &failed_grp);
if (MPI_SUCCESS != err) {
int ec;
char error[MPI_MAX_ERROR_STRING];
MPI_Error_class(err, &ec);
MPI_Error_class(err, &ec);
MPI_Error_string(err, error, &size);
fprintf(stderr, "MPIX_Comm_get_failed returned an error: %d\n%s", ec, error);
MPI_Abort(MPI_COMM_WORLD, 1);
}

MPI_Group two_grp;
int two[] = { 1, 2 };
MPI_Group_incl(world_grp, 2, two, &two_grp);
MPI_Group_compare(two_grp, failed_grp, &result);
if (MPI_IDENT != result) {
fprintf(stderr, "Second failed group contains incorrect processes\n");
MPI_Group_size(failed_grp, &size);
MPI_Group_translate_ranks(failed_grp, size, world_ranks, world_grp, failed_ranks);
for (int i = 0; i < size; i++)
fprintf(stderr, "DEAD: %d\n", failed_ranks[i]);
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Group_free(&failed_grp);
MPI_Group_free(&two_grp);

fprintf(stdout, " No errors\n");
} else if (rank == 2) {
MPI_Ssend(buf, 10, MPI_CHAR, 0, 0, MPI_COMM_WORLD);

exit(EXIT_FAILURE);
}

MPI_Group_free(&world_grp);
MPI_Finalize();
}
1 change: 1 addition & 0 deletions test/mpi/impls/mpich/ulfm/testlist
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
get_failed 3 mpiexecarg=-disable-auto-cleanup resultTest=TestStatusNoErrors timeLimit=10

0 comments on commit 8231dfd

Please sign in to comment.