Skip to content

Commit

Permalink
Minor cleanups
Browse files Browse the repository at this point in the history
If we didn't find any psets when queried about them, that isn't
a "not found" error - just return zero for the number and NULL
for the list of names.

Ensure we remove any pset names once the job containing those
names terminates - the pset name doesn't persist beyond the
lifetime of the job.

Signed-off-by: Ralph Castain <[email protected]>
  • Loading branch information
rhc54 committed Feb 5, 2025
1 parent 43dce07 commit b175907
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 14 deletions.
10 changes: 9 additions & 1 deletion src/mca/state/base/state_base_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021-2024 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -598,6 +598,7 @@ void prte_state_base_check_all_complete(int fd, short args, void *cbdata)
int32_t i32, *i32ptr;
prte_pmix_lock_t lock;
prte_app_context_t *app;
pmix_server_pset_t *pst, *pst2;
PRTE_HIDE_UNUSED_PARAMS(fd, args);

PMIX_ACQUIRE_OBJECT(caddy);
Expand Down Expand Up @@ -750,6 +751,13 @@ void prte_state_base_check_all_complete(int fd, short args, void *cbdata)
PMIX_RELEASE(map);
jdata->map = NULL;
}
// if this job has apps that named a pset, then remove them
PMIX_LIST_FOREACH_SAFE(pst, pst2, &prte_pmix_server_globals.psets, pmix_server_pset_t) {
if (pst->jdata == jdata) {
pmix_list_remove_item(&prte_pmix_server_globals.psets, &pst->super);
PMIX_RELEASE(pst);
}
}

CHECK_ALIVE:
/* now check to see if all jobs are done - trigger notification of this jdata
Expand Down
10 changes: 9 additions & 1 deletion src/mca/state/dvm/state_dvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021-2024 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -516,6 +516,7 @@ static void check_complete(int fd, short args, void *cbdata)
hwloc_obj_type_t type;
hwloc_cpuset_t boundcpus, tgt;
bool takeall, sep, *sepptr = &sep;
pmix_server_pset_t *pst, *pst2;
PRTE_HIDE_UNUSED_PARAMS(fd, args);

PMIX_ACQUIRE_OBJECT(caddy);
Expand Down Expand Up @@ -812,6 +813,13 @@ static void check_complete(int fd, short args, void *cbdata)
PMIX_RELEASE(map);
jdata->map = NULL;
}
// if this job has apps that named a pset, then remove them
PMIX_LIST_FOREACH_SAFE(pst, pst2, &prte_pmix_server_globals.psets, pmix_server_pset_t) {
if (pst->jdata == jdata) {
pmix_list_remove_item(&prte_pmix_server_globals.psets, &pst->super);
PMIX_RELEASE(pst);
}
}

/* if requested, check fd status for leaks */
if (prte_state_base.run_fdcheck) {
Expand Down
6 changes: 5 additions & 1 deletion src/prted/pmix/pmix_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* All rights reserved.
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* Copyright (c) 2023 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -2066,6 +2066,7 @@ PMIX_CLASS_INSTANCE(pmix_server_req_t,
static void pscon(pmix_server_pset_t *p)
{
p->name = NULL;
p->jdata = NULL;
p->members = NULL;
p->num_members = 0;
}
Expand All @@ -2074,6 +2075,9 @@ static void psdes(pmix_server_pset_t *p)
if (NULL != p->name) {
free(p->name);
}
if (NULL != p->jdata) {
PMIX_RELEASE(p->jdata);
}
if (NULL != p->members) {
free(p->members);
}
Expand Down
3 changes: 2 additions & 1 deletion src/prted/pmix/pmix_server_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -354,6 +354,7 @@ pmix_server_session_ctrl_fn(const pmix_proc_t *requestor,
typedef struct {
pmix_list_item_t super;
char *name;
prte_job_t *jdata;
pmix_proc_t *members;
size_t num_members;
} pmix_server_pset_t;
Expand Down
22 changes: 13 additions & 9 deletions src/prted/pmix/pmix_server_queries.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -319,7 +319,10 @@ static void _query(int sd, short args, void *cbdata)
}
/* add our findings to the results */
PMIX_INFO_LIST_CONVERT(rc, cache, &dry);
if (PMIX_SUCCESS != rc) {
if (PMIX_SUCCESS != rc && PMIX_ERR_EMPTY != rc) {
// if the array is empty, then there is nothing wrong - we
// simply didn't find any runnning jobs
// otherwise, report the error and abort
PMIX_ERROR_LOG(rc);
PMIX_INFO_LIST_RELEASE(cache);
goto done;
Expand Down Expand Up @@ -587,18 +590,19 @@ static void _query(int sd, short args, void *cbdata)
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ans, ps->name);
}
if (NULL == ans) {
ret = PMIX_ERR_NOT_FOUND;
goto done;
tmp = NULL;;
} else {
tmp = PMIX_ARGV_JOIN_COMPAT(ans, ',');
PMIX_ARGV_FREE_COMPAT(ans);
ans = NULL;
PMIX_INFO_LIST_ADD(rc, results, PMIX_QUERY_PSET_NAMES, tmp, PMIX_STRING);
}
PMIX_INFO_LIST_ADD(rc, results, PMIX_QUERY_PSET_NAMES, tmp, PMIX_STRING);
if (NULL != tmp) {
free(tmp);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
goto done;
}
}
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
goto done;
}

} else if (0 == strcmp(q->keys[n], PMIX_QUERY_PSET_MEMBERSHIP)) {
Expand Down
4 changes: 3 additions & 1 deletion src/prted/pmix/pmix_server_register_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017-2020 IBM Corporation. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -402,6 +402,8 @@ int prte_pmix_server_register_nspace(prte_job_t *jdata)
/* register it */
pset = PMIX_NEW(pmix_server_pset_t);
pset->name = strdup(tmp);
PMIX_RETAIN(jdata);
pset->jdata = jdata;
pmix_list_append(&prte_pmix_server_globals.psets, &pset->super);
free(tmp);
/* and its membership */
Expand Down

0 comments on commit b175907

Please sign in to comment.