Skip to content

Commit

Permalink
Merge pull request open-mpi#7 from rhc54/topic/update
Browse files Browse the repository at this point in the history
Sync with OMPI master
  • Loading branch information
Ralph Castain authored Sep 18, 2017
2 parents dfb36f7 + f2d9a9b commit 40ce261
Show file tree
Hide file tree
Showing 64 changed files with 4,046 additions and 2,387 deletions.
10 changes: 7 additions & 3 deletions config/opal_check_pmi.m4
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
# Copyright (c) 2014-2016 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2016 IBM Corporation. All rights reserved.
Expand Down Expand Up @@ -240,9 +240,11 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
AC_MSG_ERROR([Cannot continue])])
AC_MSG_CHECKING([if user requested external PMIx support($with_pmix)])
opal_prun_happy=no
AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"],
[AC_MSG_RESULT([no])
opal_external_pmix_happy=no],
opal_external_pmix_happy=no
opal_prun_happy=yes],
[AC_MSG_RESULT([yes])
# check for external pmix lib */
Expand Down Expand Up @@ -295,7 +297,8 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=2x
opal_external_pmix_version_found=1],
opal_external_pmix_version_found=1
opal_prun_happy=yes],
[AC_MSG_RESULT([not found])])])
AS_IF([test "$opal_external_pmix_version_found" = "0"],
Expand Down Expand Up @@ -326,5 +329,6 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
opal_external_pmix_LIBS=-lpmix
opal_external_pmix_happy=yes])
AM_CONDITIONAL([OPAL_WANT_PRUN], [test "$opal_prun_happy" = "yes"])
OPAL_VAR_SCOPE_POP
])
2 changes: 1 addition & 1 deletion config/orte_config_files.m4
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Corporation. All rights reserved.
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved
# Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down
5 changes: 2 additions & 3 deletions opal/dss/dss_pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -837,7 +837,7 @@ int opal_dss_pack_value(opal_buffer_t *buffer, const void *src,
}
break;
default:
opal_output(0, "PACK-OPAL-VALUE: UNSUPPORTED TYPE %d", (int)ptr[i]->type);
opal_output(0, "PACK-OPAL-VALUE: UNSUPPORTED TYPE %d FOR KEY %s", (int)ptr[i]->type, ptr[i]->key);
return OPAL_ERROR;
}
}
Expand Down Expand Up @@ -981,4 +981,3 @@ int opal_dss_pack_status(opal_buffer_t *buffer, const void *src,

return ret;
}

12 changes: 10 additions & 2 deletions opal/dss/dss_unpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -1086,13 +1086,21 @@ int opal_dss_unpack_value(opal_buffer_t *buffer, void *dest,
return ret;
}
break;
case OPAL_PTR:
/* just ignore these values */
break;
case OPAL_NAME:
if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &ptr[i]->data.name, &m, OPAL_NAME))) {
return ret;
}
break;
case OPAL_STATUS:
if (OPAL_SUCCESS != (ret = opal_dss_unpack_buffer(buffer, &ptr[i]->data.status, &m, OPAL_INT))) {
return ret;
}
break;
default:
opal_output(0, "PACK-OPAL-VALUE: UNSUPPORTED TYPE");
opal_output(0, "UNPACK-OPAL-VALUE: UNSUPPORTED TYPE %d FOR KEY %s", (int)ptr[i]->type, ptr[i]->key);
return OPAL_ERROR;
}
}
Expand Down
45 changes: 36 additions & 9 deletions opal/mca/btl/openib/connect/btl_openib_connect_udcm.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
*
Expand Down Expand Up @@ -100,6 +100,8 @@ typedef struct {
/** The port number of this port, also used to locate the source
endpoint when an UD CM request arrives */
uint8_t mm_port_num;
/** Global ID (needed when routers are in use) */
union ibv_gid mm_gid;
} modex_msg_t;

/*
Expand Down Expand Up @@ -738,9 +740,17 @@ static int udcm_module_init (udcm_module_t *m, mca_btl_openib_module_t *btl)
m->modex.mm_port_num = btl->port_num;
m->modex.mm_qp_num = m->listen_qp->qp_num;

BTL_VERBOSE(("my modex = LID: %d, Port: %d, QPN: %d",
m->modex.mm_lid, m->modex.mm_port_num,
m->modex.mm_qp_num));
rc = ibv_query_gid (btl->device->ib_dev_context, btl->port_num,
mca_btl_openib_component.gid_index, &m->modex.mm_gid);
if (0 != rc) {
BTL_VERBOSE(("error querying port GID"));
return OPAL_ERROR;
}

BTL_VERBOSE(("my modex = LID: %d, Port: %d, QPN: %d, GID: %08x %08x",
m->modex.mm_lid, m->modex.mm_port_num, m->modex.mm_qp_num,
m->modex.mm_gid.global.interface_id,
m->modex.mm_gid.global.subnet_prefix));

m->cpc.data.cbm_modex_message_len = sizeof(m->modex);

Expand Down Expand Up @@ -1528,6 +1538,7 @@ static int udcm_endpoint_init_data (mca_btl_base_endpoint_t *lcl_ep)
{
modex_msg_t *remote_msg = UDCM_ENDPOINT_REM_MODEX(lcl_ep);
udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep);
udcm_module_t *m = UDCM_ENDPOINT_MODULE(lcl_ep);
struct ibv_ah_attr ah_attr;
int rc = OPAL_SUCCESS;

Expand All @@ -1542,6 +1553,18 @@ static int udcm_endpoint_init_data (mca_btl_base_endpoint_t *lcl_ep)
ah_attr.port_num = remote_msg->mm_port_num;
ah_attr.sl = mca_btl_openib_component.ib_service_level;
ah_attr.src_path_bits = lcl_ep->endpoint_btl->src_path_bits;
if (0 != memcmp (&remote_msg->mm_gid, &m->modex.mm_gid, sizeof (m->modex.mm_gid))) {
ah_attr.is_global = 1;
ah_attr.grh.flow_label = 0;
ah_attr.grh.dgid = remote_msg->mm_gid;
ah_attr.grh.sgid_index = mca_btl_openib_component.gid_index;
/* NTH: probably won't need to go over more than a single router. changeme if this
* assumption is wrong. this value should never be <= 1 as it will not leave the
* the subnet. */
ah_attr.grh.hop_limit = 2;
/* Seems reasonable to set this to 0 for connection messages. */
ah_attr.grh.traffic_class = 0;
}

udep->ah = ibv_create_ah (lcl_ep->endpoint_btl->device->ib_pd, &ah_attr);
if (!udep->ah) {
Expand Down Expand Up @@ -1957,6 +1980,7 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m)
udcm_msg_t *message = NULL;
udcm_message_recv_t *item;
struct ibv_wc wc[20];
struct ibv_grh *grh;
udcm_endpoint_t *udep;
uint64_t dir;

Expand All @@ -1969,18 +1993,21 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m)
for (i = 0 ; i < count ; i++) {
dir = wc[i].wr_id & UDCM_WR_DIR_MASK;

BTL_VERBOSE(("WC: wr_id: 0x%016" PRIu64 ", status: %d, opcode: 0x%x, byte_len: %x, imm_data: 0x%08x, "
"qp_num: 0x%08x, src_qp: 0x%08x, wc_flags: 0x%x, slid: 0x%04x",
wc[i].wr_id, wc[i].status, wc[i].opcode, wc[i].byte_len,
wc[i].imm_data, wc[i].qp_num, wc[i].src_qp, wc[i].wc_flags, wc[i].slid));

if (UDCM_WR_RECV_ID != dir) {
opal_output (0, "unknown packet");
continue;
}

msg_num = (int)(wc[i].wr_id & (~UDCM_WR_DIR_MASK));

grh = (wc[i].wc_flags & IBV_WC_GRH) ? (struct ibv_grh *) udcm_module_get_recv_buffer (m, msg_num, false) : NULL;

BTL_VERBOSE(("WC: wr_id: 0x%016" PRIu64 ", status: %d, opcode: 0x%x, byte_len: %x, imm_data: 0x%08x, "
"qp_num: 0x%08x, src_qp: 0x%08x, wc_flags: 0x%x, slid: 0x%04x grh_present: %s",
wc[i].wr_id, wc[i].status, wc[i].opcode, wc[i].byte_len,
wc[i].imm_data, wc[i].qp_num, wc[i].src_qp, wc[i].wc_flags, wc[i].slid,
grh ? "yes" : "no"));

if (IBV_WC_SUCCESS != wc[i].status) {
BTL_ERROR(("recv work request for buffer %d failed, code = %d",
msg_num, wc[i].status));
Expand Down
5 changes: 1 addition & 4 deletions opal/mca/btl/usnic/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ dist_opaldata_DATA = \
help-mpi-btl-usnic.txt

test_sources = \
test/btl_usnic_component_test.h \
test/btl_usnic_graph_test.h
test/btl_usnic_component_test.h

sources = \
btl_usnic_compat.h \
Expand All @@ -50,8 +49,6 @@ sources = \
btl_usnic_endpoint.h \
btl_usnic_frag.c \
btl_usnic_frag.h \
btl_usnic_graph.h \
btl_usnic_graph.c \
btl_usnic_hwloc.c \
btl_usnic_hwloc.h \
btl_usnic_map.c \
Expand Down
3 changes: 1 addition & 2 deletions opal/mca/btl/usnic/btl_usnic_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -149,8 +150,6 @@ usnic_compat_proc_name_compare(opal_process_name_t a,
# define opal_btl_usnic_ack_segment_t ompi_btl_usnic_ack_segment_t
# define opal_btl_usnic_ack_segment_t_class ompi_btl_usnic_ack_segment_t_class

# define opal_btl_usnic_graph_t ompi_btl_usnic_graph_t

# define opal_btl_usnic_run_tests ompi_btl_usnic_run_tests

# define USNIC_SEND_LOCAL des_src
Expand Down
32 changes: 16 additions & 16 deletions opal/mca/btl/usnic/btl_usnic_proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
#include "opal/util/arch.h"
#include "opal/util/show_help.h"
#include "opal/constants.h"
#include "opal/util/bipartite_graph.h"

#include "btl_usnic_compat.h"
#include "btl_usnic.h"
#include "btl_usnic_proc.h"
#include "btl_usnic_endpoint.h"
#include "btl_usnic_module.h"
#include "btl_usnic_util.h"
#include "btl_usnic_graph.h"

/* larger weight values are more desirable (i.e., worth, not cost) */
enum {
Expand Down Expand Up @@ -427,13 +427,13 @@ static void edge_pairs_to_match_table(
static int create_proc_module_graph(
opal_btl_usnic_proc_t *proc,
bool proc_is_left,
opal_btl_usnic_graph_t **g_out)
opal_bp_graph_t **g_out)
{
int err;
int i, j;
int u, v;
int num_modules;
opal_btl_usnic_graph_t *g = NULL;
opal_bp_graph_t *g = NULL;

if (NULL == g_out) {
return OPAL_ERR_BAD_PARAM;
Expand All @@ -444,7 +444,7 @@ static int create_proc_module_graph(

/* Construct a bipartite graph with remote interfaces on the one side and
* local interfaces (modules) on the other. */
err = opal_btl_usnic_gr_create(NULL, NULL, &g);
err = opal_bp_graph_create(NULL, NULL, &g);
if (OPAL_SUCCESS != err) {
OPAL_ERROR_LOG(err);
goto out;
Expand All @@ -453,9 +453,9 @@ static int create_proc_module_graph(
/* create vertices for each interface (local and remote) */
for (i = 0; i < num_modules; ++i) {
int idx = -1;
err = opal_btl_usnic_gr_add_vertex(g,
mca_btl_usnic_component.usnic_active_modules[i],
&idx);
err = opal_bp_graph_add_vertex(g,
mca_btl_usnic_component.usnic_active_modules[i],
&idx);
if (OPAL_SUCCESS != err) {
OPAL_ERROR_LOG(err);
goto out_free_graph;
Expand All @@ -464,7 +464,7 @@ static int create_proc_module_graph(
}
for (i = 0; i < (int)proc->proc_modex_count; ++i) {
int idx = -1;
err = opal_btl_usnic_gr_add_vertex(g, &proc->proc_modex[i], &idx);
err = opal_bp_graph_add_vertex(g, &proc->proc_modex[i], &idx);
if (OPAL_SUCCESS != err) {
OPAL_ERROR_LOG(err);
goto out_free_graph;
Expand Down Expand Up @@ -509,9 +509,9 @@ static int create_proc_module_graph(
opal_output_verbose(20, USNIC_OUT,
"btl:usnic:%s: adding edge (%d,%d) with cost=%" PRIi64 " for edge module[%d] <--> endpoint[%d]",
__func__, u, v, cost, i, j);
err = opal_btl_usnic_gr_add_edge(g, u, v, cost,
/*capacity=*/1,
/*e_data=*/NULL);
err = opal_bp_graph_add_edge(g, u, v, cost,
/*capacity=*/1,
/*e_data=*/NULL);
if (OPAL_SUCCESS != err) {
OPAL_ERROR_LOG(err);
goto out_free_graph;
Expand All @@ -523,7 +523,7 @@ static int create_proc_module_graph(
return OPAL_SUCCESS;

out_free_graph:
opal_btl_usnic_gr_free(g);
opal_bp_graph_free(g);
out:
return err;
}
Expand All @@ -547,7 +547,7 @@ static int match_modex(opal_btl_usnic_module_t *module,
int err = OPAL_SUCCESS;
size_t i;
uint32_t num_modules;
opal_btl_usnic_graph_t *g = NULL;
opal_bp_graph_t *g = NULL;
bool proc_is_left;

if (NULL == index_out) {
Expand Down Expand Up @@ -599,7 +599,7 @@ static int match_modex(opal_btl_usnic_module_t *module,

int nme = 0;
int *me = NULL;
err = opal_btl_usnic_solve_bipartite_assignment(g, &nme, &me);
err = opal_bp_graph_solve_bipartite_assignment(g, &nme, &me);
if (OPAL_SUCCESS != err) {
OPAL_ERROR_LOG(err);
goto out_free_graph;
Expand All @@ -608,7 +608,7 @@ static int match_modex(opal_btl_usnic_module_t *module,
edge_pairs_to_match_table(proc, proc_is_left, nme, me);
free(me);

err = opal_btl_usnic_gr_free(g);
err = opal_bp_graph_free(g);
if (OPAL_SUCCESS != err) {
OPAL_ERROR_LOG(err);
return err;
Expand Down Expand Up @@ -655,7 +655,7 @@ static int match_modex(opal_btl_usnic_module_t *module,
return (*index_out == -1 ? OPAL_ERR_NOT_FOUND : OPAL_SUCCESS);

out_free_graph:
opal_btl_usnic_gr_free(g);
opal_bp_graph_free(g);
out_free_table:
free(proc->proc_ep_match_table);
proc->proc_ep_match_table = NULL;
Expand Down
Loading

0 comments on commit 40ce261

Please sign in to comment.