Skip to content

Commit

Permalink
Fix second pass on map-by-obj
Browse files Browse the repository at this point in the history
Computation of balance was using the wrong variable
in the denominator.

Signed-off-by: Ralph Castain <[email protected]>
  • Loading branch information
rhc54 committed Aug 23, 2022
1 parent 75511d4 commit 6ed6563
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/mca/rmaps/base/help-prte-rmaps-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ slots that were requested by the application:

%s

Either request fewer slots for your application, or make more slots
Either request fewer procs for your application, or make more slots
available for use.

A "slot" is the PRRTE term for an allocatable unit where we can
Expand Down
53 changes: 36 additions & 17 deletions src/mca/rmaps/round_robin/rmaps_rr_mappers.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ int prte_rmaps_rr_byslot(prte_job_t *jdata,
}
}

if (!options->oversubscribe) {
/* since oversubscribe is not allowed, cap our usage
* at the number of available slots. */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
* then we are not oversubscribed but we are overloaded. If
Expand Down Expand Up @@ -240,6 +248,14 @@ int prte_rmaps_rr_bynode(prte_job_t *jdata,
{
prte_rmaps_base_get_cpuset(jdata, node, options);

if (!options->oversubscribe) {
/* since oversubscribe is not allowed, cap our usage
* at the number of available slots. */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
* then we are not oversubscribed but we are overloaded. If
Expand All @@ -259,19 +275,6 @@ int prte_rmaps_rr_bynode(prte_job_t *jdata,
continue;
}

/* if oversubscribe is specified, then just ignore the
* number of slots on each node and assign this number.
* Note that oversubscribe automatically dictates that
* we do not bind, so binding can also be ignored */

if (!options->oversubscribe) {
/* since oversubscribe is not allowed , cap our usage
* at the number of available slots */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

PRTE_OUTPUT_VERBOSE((10, prte_rmaps_base_framework.framework_output,
"%s NODE %s ASSIGNING %d PROCS",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME),
Expand Down Expand Up @@ -374,6 +377,14 @@ int prte_rmaps_rr_bycpu(prte_job_t *jdata, prte_app_context_t *app,
}
}

if (!options->oversubscribe) {
/* oversubscribe is not allowed, so cap our usage
* at the number of available slots. */
if (node->slots_available < options->nprocs) {
options->nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
* then we are not oversubscribed but we are overloaded. If
Expand Down Expand Up @@ -576,8 +587,14 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app,
}
}
}
prte_output_verbose(2, prte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: assigning nprocs %d", nprocs);

if (!options->oversubscribe) {
/* since oversubscribe is not allowed, cap our usage
* at the number of available slots. */
if (node->slots_available < nprocs) {
nprocs = node->slots_available;
}
}

/* if the number of procs is greater than the number of CPUs
* on this node, but less or equal to the number of slots,
Expand All @@ -592,6 +609,9 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app,
jdata->map->binding = PRTE_BIND_TO_NONE;
}

prte_output_verbose(2, prte_rmaps_base_framework.framework_output,
"mca:rmaps:rr: assigning nprocs %d", nprocs);

nodefull = false;
if (span) {
/* if we are mapping spanned, then we loop over
Expand Down Expand Up @@ -700,8 +720,7 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app,
* handling the oversubscription. Figure out how many procs
* to add to each of them.
*/
balance = (float) ((int) app->num_procs - nprocs_mapped)
/ (float) total_nobjs;
balance = (float) ((int) app->num_procs - nprocs_mapped) / (float) options->total_nobjs;
extra_procs_to_assign = (int) balance;
if (0 < (balance - (float) extra_procs_to_assign)) {
/* compute how many nodes need an extra proc */
Expand Down

0 comments on commit 6ed6563

Please sign in to comment.