Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MPI-PR: check for number of open files #310

Merged
merged 5 commits into from
Jul 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/github_actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ jobs:
armci_network: mpi-pr
f77: ifort
cc: icc
config_opts: LIBS=-lifcore
oneapi: /Users/runner/apps/oneapi
- os: ubuntu-20.04
experimental: true
Expand Down Expand Up @@ -116,6 +117,7 @@ jobs:
armci_network: mpi-ts
f77: ifort
cc: gcc
oneapi: /opt/intel/oneapi
exclude:
- armci_network: mpi-pr
mpi_impl: openmpi
Expand Down
29 changes: 24 additions & 5 deletions comex/src-mpi-pr/comex.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ static char *static_server_buffer = NULL;
static int static_server_buffer_size = 0;
static int eager_threshold = -1;
static int max_message_size = -1;
#if ENABLE_SYSV
static int use_dev_shm = 1;
#endif
static int token_counter = 0;
static int init_from_comm = 0;

Expand Down Expand Up @@ -375,6 +377,7 @@ STATIC void check_devshm(int fd, size_t size);
static int devshm_initialized = 0;
static long devshm_fs_left = 0;
static long devshm_fs_initial = 0;
STATIC void count_open_fds(void);

int _comex_init(MPI_Comm comm)
{
Expand Down Expand Up @@ -7561,13 +7564,9 @@ STATIC void check_devshm(int fd, size_t size){
g_state.rank, g_state.node_size, devshm_fs_initial/CONVERT_TO_M, (long) ufs_statfs.f_bsize, (long) g_state.node_size);
#endif
}
// if (size > 0) {
count_open_fds();
newspace = (long) ( size*(g_state.node_size -1));
// }else{
// newspace = (long) ( size);
// }
if(newspace>0){
// noo fd for space<0
fstatfs(fd, &ufs_statfs);
#ifdef DEBUGSHM
fprintf(stderr, "[%d] /dev/shm filesize %ld filesize*np %ld initial devshm space %ld current /dev/shm space %ld \n",
Expand All @@ -7594,3 +7593,23 @@ STATIC void check_devshm(int fd, size_t size){
#endif
#endif
}

STATIC void count_open_fds(void) {
FILE *f = fopen("/proc/sys/fs/file-nr", "r");

long nfiles, unused, maxfiles;
fscanf(f, "%ld %ld %ld", &nfiles, &unused, &maxfiles);
#ifdef DEBUGSHM
if(nfiles % 1000 == 0) fprintf(stderr," %d: no. open files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles);
#endif
if(nfiles > (maxfiles/100)*60) {
printf(" %d: running out of files; files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles);
#if PAUSE_ON_ERROR
fprintf(stderr,"%d(%d): too many open files\n",
g_state.rank, getpid());
pause();
#endif
comex_error("count_open_fds: too many open files", -1);
}
fclose(f);
}