Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix tree spawn routed component issue #6944

Merged
merged 1 commit into from
Sep 9, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion orte/mca/plm/rsh/plm_rsh_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,12 @@ static int setup_launch(int *argcptr, char ***argvptr,
char *orted_cmd, *orted_prefix, *final_cmd;
int orted_index;
int rc;
int i, j;
int i, j, cnt;
bool found;
char *lib_base=NULL, *bin_base=NULL;
char *opal_prefix = getenv("OPAL_PREFIX");
char* full_orted_cmd = NULL;
char * rtmod;

/* Figure out the basenames for the libdir and bindir. This
requires some explanation:
Expand Down Expand Up @@ -609,6 +610,18 @@ static int setup_launch(int *argcptr, char ***argvptr,
(mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
}

if (!mca_plm_rsh_component.no_tree_spawn) {
// Remove problematic and/or conflicting command line arguments that
// should not be passed on to our children.
cnt = opal_argv_count(orted_cmd_line);
for (i=0; i < cnt; i+=3) {
if (0 == strcmp(orted_cmd_line[i+1], "routed")) {
opal_argv_delete(&cnt, &orted_cmd_line, i, 3);
break;
}
}
}

/*
* Add the basic arguments to the orted command line, including
* all debug options
Expand All @@ -627,6 +640,16 @@ static int setup_launch(int *argcptr, char ***argvptr,
if (!mca_plm_rsh_component.no_tree_spawn) {
opal_argv_append(&argc, &argv, "--tree-spawn");
orte_oob_base_get_addr(&param);

// When tree-spawn'ing we need to force the remote daemons to use
// the routing component that was used to setup the launch tree.
// Otherwise the orte_parent_uri will not match the orted they
// expect to find in the routing tree.
rtmod = orte_rml.get_routed(orte_coll_conduit);
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(&argc, &argv, "routed");
opal_argv_append(&argc, &argv, rtmod);

opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(&argc, &argv, "orte_parent_uri");
opal_argv_append(&argc, &argv, param);
Expand Down Expand Up @@ -1187,6 +1210,10 @@ static void launch_daemons(int fd, short args, void *cbdata)
OBJ_CONSTRUCT(&coll, opal_list_t);
rtmod = orte_rml.get_routed(orte_coll_conduit);
orte_routed.get_routing_list(rtmod, &coll);

OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:rsh:launch Tree Launch using routed/%s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rtmod));
}

/* setup the launch */
Expand Down