diff --git a/ompi/mca/coll/libnbc/coll_libnbc.h b/ompi/mca/coll/libnbc/coll_libnbc.h index 724b86d678a..b66ce60fa5e 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc.h +++ b/ompi/mca/coll/libnbc/coll_libnbc.h @@ -13,7 +13,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science + * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -76,6 +76,7 @@ struct ompi_coll_libnbc_component_t { opal_list_t active_requests; int32_t active_comms; opal_atomic_lock_t progress_lock; + opal_mutex_t lock; }; typedef struct ompi_coll_libnbc_component_t ompi_coll_libnbc_component_t; diff --git a/ompi/mca/coll/libnbc/coll_libnbc_component.c b/ompi/mca/coll/libnbc/coll_libnbc_component.c index 8670181bb71..fe8bdcc902a 100644 --- a/ompi/mca/coll/libnbc/coll_libnbc_component.c +++ b/ompi/mca/coll/libnbc/coll_libnbc_component.c @@ -91,6 +91,7 @@ libnbc_open(void) OBJ_CONSTRUCT(&mca_coll_libnbc_component.requests, opal_free_list_t); OBJ_CONSTRUCT(&mca_coll_libnbc_component.active_requests, opal_list_t); + OBJ_CONSTRUCT(&mca_coll_libnbc_component.lock, opal_mutex_t); ret = opal_free_list_init (&mca_coll_libnbc_component.requests, sizeof(ompi_coll_libnbc_request_t), 8, OBJ_CLASS(ompi_coll_libnbc_request_t), @@ -115,6 +116,7 @@ libnbc_close(void) OBJ_DESTRUCT(&mca_coll_libnbc_component.requests); OBJ_DESTRUCT(&mca_coll_libnbc_component.active_requests); + OBJ_DESTRUCT(&mca_coll_libnbc_component.lock); return OMPI_SUCCESS; } @@ -263,13 +265,17 @@ ompi_coll_libnbc_progress(void) if (opal_atomic_trylock(&mca_coll_libnbc_component.progress_lock)) return 0; + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); OPAL_LIST_FOREACH_SAFE(request, next, &mca_coll_libnbc_component.active_requests, ompi_coll_libnbc_request_t) { + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); res = NBC_Progress(request); if( NBC_CONTINUE != res ) { /* done, remove and complete */ + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); opal_list_remove_item(&mca_coll_libnbc_component.active_requests, &request->super.super.super); + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); if( OMPI_SUCCESS == res || NBC_OK == res || NBC_SUCCESS == res ) { request->super.req_status.MPI_ERROR = OMPI_SUCCESS; @@ -281,7 +287,9 @@ ompi_coll_libnbc_progress(void) ompi_request_complete(&request->super, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); } + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); } + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); opal_atomic_unlock(&mca_coll_libnbc_component.progress_lock); diff --git a/ompi/mca/coll/libnbc/nbc.c b/ompi/mca/coll/libnbc/nbc.c index 430b506564d..7949fe1b90f 100644 --- a/ompi/mca/coll/libnbc/nbc.c +++ b/ompi/mca/coll/libnbc/nbc.c @@ -702,7 +702,9 @@ int NBC_Start(NBC_Handle *handle, NBC_Schedule *schedule) { if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } + OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock); opal_list_append(&mca_coll_libnbc_component.active_requests, &(handle->super.super.super)); + OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock); return OMPI_SUCCESS; }