Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redesign the command line system to provide isolation #1176

Merged
merged 8 commits into from
Feb 5, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions src/hwloc/hwloc-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ typedef uint16_t prte_binding_policy_t;
/* bind each rank to the cpu in the given
* cpu list based on its node-local-rank */
#define PRTE_BIND_ORDERED 0x8000
// overload policy was given
#define PRTE_BIND_OVERLOAD_GIVEN 0x0100

/* binding policies - any changes in these
* values must be reflected in prte/mca/rmaps/rmaps.h
Expand All @@ -165,14 +167,14 @@ typedef uint16_t prte_binding_policy_t;
#define PRTE_BIND_TO_L1CACHE 6
#define PRTE_BIND_TO_CORE 7
#define PRTE_BIND_TO_HWTHREAD 8
#define PRTE_GET_BINDING_POLICY(pol) ((pol) &0x0fff)
#define PRTE_GET_BINDING_POLICY(pol) ((pol) &0x00ff)
#define PRTE_SET_BINDING_POLICY(target, pol) \
(target) = (pol) | (((target) &0x2000) | PRTE_BIND_GIVEN)
#define PRTE_SET_DEFAULT_BINDING_POLICY(target, pol) \
do { \
if (!PRTE_BINDING_POLICY_IS_SET((target))) { \
(target) = (pol) | (((target) &0xf000) | PRTE_BIND_IF_SUPPORTED); \
} \
(target) = (pol) | (((target) & 0xff00) | PRTE_BIND_GIVEN)
#define PRTE_SET_DEFAULT_BINDING_POLICY(target, pol) \
do { \
if (!PRTE_BINDING_POLICY_IS_SET((target))) { \
(target) = (pol) | (((target) & 0xff00) | PRTE_BIND_IF_SUPPORTED); \
} \
} while (0);

/* check if policy is set */
Expand All @@ -181,6 +183,7 @@ typedef uint16_t prte_binding_policy_t;
#define PRTE_BINDING_REQUIRED(n) (!(PRTE_BIND_IF_SUPPORTED & (n)))
/* macro to detect if binding is forced */
#define PRTE_BIND_OVERLOAD_ALLOWED(n) (PRTE_BIND_ALLOW_OVERLOAD & (n))
#define PRTE_BIND_OVERLOAD_SET(n) (PRTE_BIND_OVERLOAD_GIVEN & (n))
#define PRTE_BIND_ORDERED_REQUESTED(n) (PRTE_BIND_ORDERED & (n))

/* some global values */
Expand Down Expand Up @@ -257,6 +260,8 @@ PRTE_EXPORT prte_hwloc_locality_t prte_hwloc_base_get_relative_locality(hwloc_to
char *cpuset1,
char *cpuset2);

PRTE_EXPORT int prte_hwloc_base_set_default_binding(void *jdata,
void *options);
PRTE_EXPORT int prte_hwloc_base_set_binding_policy(void *jdata, char *spec);

/**
Expand Down
143 changes: 141 additions & 2 deletions src/hwloc/hwloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -18,6 +18,7 @@
#include "src/mca/base/base.h"
#include "src/mca/mca.h"
#include "src/mca/rmaps/rmaps_types.h"
#include "src/mca/schizo/schizo.h"
#include "src/runtime/prte_globals.h"
#include "src/threads/tsd.h"
#include "src/util/argv.h"
Expand Down Expand Up @@ -301,6 +302,141 @@ void prte_hwloc_base_close(void)
prte_hwloc_base_inited = false;
}

int prte_hwloc_base_set_default_binding(void *jd, void *opt)
{
prte_job_t *jdata = (prte_job_t*)jd;
prte_schizo_options_t *options = (prte_schizo_options_t*)opt;
prte_mapping_policy_t mpol;

if (prte_get_attribute(&jdata->attributes, PRTE_JOB_PES_PER_PROC, NULL, PMIX_UINT16)) {
/* bind to cpus */
if (options->use_hwthreads) {
/* if we are using hwthread cpus, then bind to those */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using byhwthread",
__LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD);
} else {
/* bind to core */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bycore", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE);
}
} else {
/* if the user explicitly mapped-by some object, then we default
* to binding to that object */
mpol = PRTE_GET_MAPPING_POLICY(jdata->map->mapping);
if (PRTE_MAPPING_GIVEN & PRTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
if (PRTE_MAPPING_BYHWTHREAD == mpol) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using byhwthread", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD);
} else if (PRTE_MAPPING_BYCORE == mpol) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bycore", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE);
} else if (PRTE_MAPPING_BYL1CACHE == mpol) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using byL1", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L1CACHE);
} else if (PRTE_MAPPING_BYL2CACHE == mpol) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using byL2", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L2CACHE);
} else if (PRTE_MAPPING_BYL3CACHE == mpol) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using byL3", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L3CACHE);
} else if (PRTE_MAPPING_BYNUMA == mpol) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bynuma",
__LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NUMA);
} else if (PRTE_MAPPING_BYPACKAGE == mpol) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bypackage", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_PACKAGE);
} else {
/* we are mapping by node or some other non-object method */
if (options->nprocs <= 2) {
if (options->use_hwthreads) {
/* if we are using hwthread cpus, then bind to those */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using byhwthread", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding,
PRTE_BIND_TO_HWTHREAD);
} else {
/* for performance, bind to core */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bycore", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding,
PRTE_BIND_TO_CORE);
}
} else {
/* bind to numa (if present), or by package (if numa isn't present and package is) */
if (NULL != hwloc_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_NUMANODE, 0)) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bynuma", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NUMA);
} else if (NULL != hwloc_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_PACKAGE, 0)) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bypackage", __LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_PACKAGE);
} else {
/* if we have neither, then just don't bind */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given and no NUMA "
"or packages - not binding",
__LINE__);
PRTE_SET_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NONE);
}
}
}
} else if (options->nprocs <= 2) {
if (options->use_hwthreads) {
/* if we are using hwthread cpus, then bind to those */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using byhwthread",
__LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD);
} else {
/* for performance, bind to core */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bycore",
__LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE);
}
} else {
/* for performance, bind to numa, if available, else try package */
if (NULL != hwloc_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_NUMANODE, 0)) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bynuma",
__LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NUMA);
} else if (NULL != hwloc_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_PACKAGE, 0)) {
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given - using bypackage",
__LINE__);
PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_PACKAGE);
} else {
/* just don't bind */
prte_output_verbose(options->verbosity, options->stream,
"setdefaultbinding[%d] binding not given and no packages - not binding",
__LINE__);
PRTE_SET_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NONE);
}
}
}
/* they might have set the overload-allowed flag while wanting PRRTE
* to set the default binding - don't override it */
if (!PRTE_BIND_OVERLOAD_SET(jdata->map->binding)) {
if (PRTE_BIND_OVERLOAD_ALLOWED(prte_hwloc_default_binding_policy)) {
jdata->map->binding |= PRTE_BIND_ALLOW_OVERLOAD;
}
}
return PRTE_SUCCESS;
}

static bool fns_init = false;
static prte_tsd_key_t print_tsd_key;
char *prte_hwloc_print_null = "NULL";
Expand Down Expand Up @@ -507,7 +643,10 @@ int prte_hwloc_base_set_binding_policy(void *jdat, char *spec)
if (0 == strcasecmp(quals[i], "if-supported")) {
tmp |= PRTE_BIND_IF_SUPPORTED;
} else if (0 == strcasecmp(quals[i], "overload-allowed")) {
tmp |= PRTE_BIND_ALLOW_OVERLOAD;
tmp |= (PRTE_BIND_ALLOW_OVERLOAD | PRTE_BIND_OVERLOAD_GIVEN);
} else if (0 == strcasecmp(quals[i], "no-overload")) {
tmp = (tmp & ~PRTE_BIND_ALLOW_OVERLOAD);
tmp |= PRTE_BIND_OVERLOAD_GIVEN;
} else if (0 == strcasecmp(quals[i], "ordered")) {
tmp |= PRTE_BIND_ORDERED;
} else if (0 == strcasecmp(quals[i], "REPORT")) {
Expand Down
12 changes: 11 additions & 1 deletion src/mca/ess/base/ess_base_std_prted.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* Copyright (c) 2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -64,6 +64,7 @@
#include "src/mca/routed/base/base.h"
#include "src/mca/routed/routed.h"
#include "src/mca/rtc/base/base.h"
#include "src/mca/schizo/base/base.h"
#include "src/mca/state/base/base.h"
#include "src/mca/state/state.h"
#include "src/prted/pmix/pmix_server.h"
Expand Down Expand Up @@ -288,6 +289,15 @@ int prte_ess_base_prted_setup(void)
jdata = PRTE_NEW(prte_job_t);
PMIX_LOAD_NSPACE(jdata->nspace, PRTE_PROC_MY_NAME->nspace);
prte_set_job_data_object(jdata);
/* set the schizo personality to "prte" by default */
jdata->schizo = (struct prte_schizo_base_module_t*)prte_schizo_base_detect_proxy("prte");
if (NULL == jdata->schizo) {
prte_show_help("help-schizo-base.txt", "no-proxy", true, prte_tool_basename, "prte");
error = "select personality";
ret = PRTE_ERR_SILENT;
goto error;
}

/* every job requires at least one app */
app = PRTE_NEW(prte_app_context_t);
prte_pointer_array_set_item(jdata->apps, 0, app);
Expand Down
12 changes: 11 additions & 1 deletion src/mca/ess/hnp/ess_hnp_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2017-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -73,6 +73,7 @@
#include "src/mca/routed/base/base.h"
#include "src/mca/routed/routed.h"
#include "src/mca/rtc/base/base.h"
#include "src/mca/schizo/base/base.h"
#include "src/mca/state/base/base.h"
#include "src/mca/state/state.h"

Expand Down Expand Up @@ -315,6 +316,15 @@ static int rte_init(int argc, char **argv)
PMIX_LOAD_NSPACE(jdata->nspace, PRTE_PROC_MY_NAME->nspace);
prte_set_job_data_object(jdata);

/* set the schizo personality to "prte" by default */
jdata->schizo = (struct prte_schizo_base_module_t*)prte_schizo_base_detect_proxy("prte");
if (NULL == jdata->schizo) {
prte_show_help("help-schizo-base.txt", "no-proxy", true, prte_tool_basename, "prte");
error = "select personality";
ret = PRTE_ERR_SILENT;
goto error;
}

/* mark that the daemons have reported as we are the
* only ones in the system right now, and we definitely
* are running!
Expand Down
Loading