Skip to content

Commit

Permalink
Make sure the gather is called in all cases, and not
Browse files Browse the repository at this point in the history
simply based on some local state. This is the second
part of the patch proposed for open-mpi#1183.

Signed-off-by: George Bosilca <[email protected]>
  • Loading branch information
bosilca committed Jul 26, 2017
1 parent 9d3dcc9 commit fbe6c22
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 84 deletions.
145 changes: 64 additions & 81 deletions ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
OPAL_MODEX_RECV_VALUE(err, OPAL_PMIX_NODEID, &(proc->super.proc_name), &pval, OPAL_UINT32);
if( OPAL_SUCCESS != err ) {
opal_output(0, "Unable to extract peer %s nodeid from the modex.\n",
OMPI_NAME_PRINT(&(proc->super.proc_name)));
vpids[i] = colors[i] = -1;
OMPI_NAME_PRINT(&(proc->super)));
colors[i] = -1;
continue;
}
vpids[i] = colors[i] = (int)val;
Expand Down Expand Up @@ -396,38 +396,37 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
* If weights have been provided take them in account. Otherwise rely
* solely on HWLOC information.
*/
if(0 == rank) {
if( 0 == rank ) {

#ifdef __DEBUG__
fprintf(stderr,"========== Centralized Reordering ========= \n");

#endif
local_pattern = (double *)calloc(size*size,sizeof(double));
if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
local_pattern[topo->in[i]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
local_pattern[topo->out[i]] += topo->outw[i];
if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_gather(MPI_IN_PLACE, size, MPI_DOUBLE,
local_pattern, size, MPI_DOUBLE,
0, comm_old,
comm_old->c_coll->coll_gather_module)))
return err;
}
} else {
local_pattern = (double *)calloc(size,sizeof(double));
if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
local_pattern[topo->in[i]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
local_pattern[topo->out[i]] += topo->outw[i];
if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_gather(local_pattern, size, MPI_DOUBLE,
NULL,0,0,
0, comm_old,
comm_old->c_coll->coll_gather_module)))
return err;
}
}
if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
local_pattern[topo->in[i]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
local_pattern[topo->out[i]] += topo->outw[i];
}
if(0 == rank) {
err = comm_old->c_coll->coll_gather(MPI_IN_PLACE, size, MPI_DOUBLE,
local_pattern, size, MPI_DOUBLE,
0, comm_old,
comm_old->c_coll->coll_gather_module);
} else {
err = comm_old->c_coll->coll_gather(local_pattern, size, MPI_DOUBLE,
NULL,0,0,
0, comm_old,
comm_old->c_coll->coll_gather_module);
}
if (OMPI_SUCCESS != err) {
return err;
}

if( rank == local_procs[0]) {
if( rank == local_procs[0] ) {
tm_topology_t *tm_topology = NULL;
tm_topology_t *tm_opt_topology = NULL;
int *obj_to_rank_in_comm = NULL;
Expand Down Expand Up @@ -708,7 +707,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
char set_as_string[64];
opal_value_t kv;

if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old,colors[rank],ompi_process_info.my_local_rank,&localcomm, false)))
if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, colors[rank],
ompi_process_info.my_local_rank,
&localcomm, false)))
return err;

for(i = 0 ; i < num_procs_in_node ; i++)
Expand All @@ -718,64 +719,64 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
for(i = 0 ; i < size ; i++)
grank_to_lrank[i] = -1;

if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_allgather(&rank,1,MPI_INT,
lrank_to_grank,1,MPI_INT,
localcomm,
localcomm->c_coll->coll_allgather_module)))
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_allgather(&rank, 1, MPI_INT,
lrank_to_grank, 1, MPI_INT,
localcomm,
localcomm->c_coll->coll_allgather_module)))
return err;

for(i = 0 ; i < num_procs_in_node ; i++)
grank_to_lrank[lrank_to_grank[i]] = i;

if (rank == local_procs[0]){
tm_topology_t *tm_topology = NULL;
tm_topology_t *tm_opt_topology = NULL;
tree_t *comm_tree = NULL;
double **comm_pattern = NULL;

/* Discover the local patterns */
if (rank == local_procs[0]) {
#ifdef __DEBUG__
fprintf(stderr,"========== Partially Distributed Reordering ========= \n");
#endif
local_pattern = (double *)calloc(num_procs_in_node * num_procs_in_node,sizeof(double));
} else {
local_pattern = (double *)calloc(num_procs_in_node,sizeof(double));
}
for(i = 0; i < topo->indegree ; i++)
if (grank_to_lrank[topo->in[i]] != -1)
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
if (grank_to_lrank[topo->out[i]] != -1)
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_gather((rank == local_procs[0] ? MPI_IN_PLACE : local_pattern),
num_procs_in_node, MPI_DOUBLE,
local_pattern, num_procs_in_node, MPI_DOUBLE,
0, localcomm,
localcomm->c_coll->coll_gather_module)))
ERR_EXIT(err);

local_pattern = (double *)calloc(num_procs_in_node*num_procs_in_node,sizeof(double));
for(i = 0 ; i < num_procs_in_node*num_procs_in_node ; i++)
local_pattern[i] = 0.0;

if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
if (grank_to_lrank[topo->in[i]] != -1)
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
if (grank_to_lrank[topo->out[i]] != -1)
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_gather(MPI_IN_PLACE, num_procs_in_node, MPI_DOUBLE,
local_pattern, num_procs_in_node, MPI_DOUBLE,
0,localcomm,
localcomm->c_coll->coll_gather_module)))
ERR_EXIT(err);
}
/* The root has now the entire information, so let's crunch it */
if (rank == local_procs[0]) {
tm_topology_t *tm_topology = NULL;
tm_topology_t *tm_opt_topology = NULL;
tree_t *comm_tree = NULL;
double **comm_pattern = NULL;

comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
for(i = 0 ; i < num_procs_in_node ; i++){
comm_pattern[i] = (double *)calloc(num_procs_in_node,sizeof(double));
memcpy((void *)comm_pattern[i],(void *)(local_pattern + i*num_procs_in_node),num_procs_in_node*sizeof(double));
comm_pattern[i] = (double *)calloc(num_procs_in_node, sizeof(double));
memcpy((void *)comm_pattern[i],
(void *)(local_pattern + i*num_procs_in_node),
num_procs_in_node*sizeof(double));
}
/* Matrix needs to be symmetric */
for( i = 0 ; i < num_procs_in_node ; i++)
for(j = i ; j < num_procs_in_node ; j++){
comm_pattern[i][j] += comm_pattern[j][i];
comm_pattern[j][i] = comm_pattern[i][j];
comm_pattern[i][j] = (comm_pattern[i][j] + comm_pattern[j][i]) / 2;
comm_pattern[j][i] = comm_pattern[i][j];
}
for( i = 0 ; i < num_procs_in_node ; i++)
for(j = 0 ; j < num_procs_in_node ; j++)
comm_pattern[i][j] /= 2;

#ifdef __DEBUG__
fprintf(stdout,"========== COMM PATTERN ============= \n");
for(i = 0 ; i < num_procs_in_node ; i++){
fprintf(stdout," %i : ",i);
for(j = 0; j < num_procs_in_node ; j++)
fprintf(stdout," %f ",comm_pattern[i][j]);
fprintf(stdout," %f ", comm_pattern[i][j]);
fprintf(stdout,"\n");
}
fprintf(stdout,"======================= \n");
Expand Down Expand Up @@ -830,24 +831,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
free(tm_topology->arity);
free(tm_topology);
FREE_topology(tm_opt_topology);
} else {
local_pattern = (double *)calloc(num_procs_in_node,sizeof(double));
for(i = 0 ; i < num_procs_in_node ; i++)
local_pattern[i] = 0.0;

if( true == topo->weighted ) {
for(i = 0; i < topo->indegree ; i++)
if (grank_to_lrank[topo->in[i]] != -1)
local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
for(i = 0; i < topo->outdegree ; i++)
if (grank_to_lrank[topo->out[i]] != -1)
local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_gather(local_pattern, num_procs_in_node, MPI_DOUBLE,
NULL,0,0,
0,localcomm,
localcomm->c_coll->coll_gather_module)))
ERR_EXIT(err);
}
}

if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_bcast(matching, num_procs_in_node,
Expand Down
3 changes: 1 addition & 2 deletions ompi/mca/topo/treematch/treematch/tm_kpartitioning.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,7 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
verbose_level = get_verbose_level();

if(verbose_level>=INFO)
printf("Number of constraints: %d\n", nb_constraints);
printf("Number of constraints: %d, N=%d\n", nb_constraints, N);
printf("Number of constraints: %d, N=%d\n", nb_constraints, N);

nb_cores=nb_processing_units(topology);

Expand Down
3 changes: 2 additions & 1 deletion ompi/mca/topo/treematch/treematch/tm_tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1614,7 +1614,8 @@ tree_t * build_tree_from_topology(tm_topology_t *topology, double **com_mat, int

nb_constraints = check_constraints (topology, &constraints);

printf("nb_constraints = %d, N= %d; nb_processing units = %d\n",nb_constraints, N, nb_processing_units(topology));
if(verbose_level>=INFO)
printf("nb_constraints = %d, N= %d; nb_processing units = %d\n",nb_constraints, N, nb_processing_units(topology));

if(N>nb_constraints){
if(verbose_level >= CRITICAL){
Expand Down

0 comments on commit fbe6c22

Please sign in to comment.