Skip to content

Commit

Permalink
Fix ADAPT for few corner cases
Browse files Browse the repository at this point in the history
Add support for fallback to previous coll module on non-commutative operations (open-mpi#30)
Replace mutexes by atomic operations.
Use the correct nbc request type (open-mpi#31)
* coll/base: document type casts in ompi_coll_base_retain_*
Other minor fixes.

Signed-off-by: George Bosilca <[email protected]>
Signed-off-by: Joseph Schuchart <[email protected]>
  • Loading branch information
bosilca committed Sep 9, 2020
1 parent 43e3add commit b0de06f
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 116 deletions.
33 changes: 33 additions & 0 deletions ompi/mca/coll/adapt/coll_adapt.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,44 @@ typedef struct mca_coll_adapt_component_t {

} mca_coll_adapt_component_t;

/*
* Structure used to store what is necessary for the collective operations
* routines in case of fallback.
*/
typedef struct mca_coll_adapt_collective_fallback_s {
union {
mca_coll_base_module_reduce_fn_t reduce;
mca_coll_base_module_ireduce_fn_t ireduce;
} previous_routine;
mca_coll_base_module_t *previous_module;
} mca_coll_adapt_collective_fallback_t;


typedef enum mca_coll_adapt_colltype {
ADAPT_REDUCE = 0,
ADAPT_IREDUCE = 1,
ADAPT_COLLCOUNT
} mca_coll_adapt_colltype_t;

/*
* Some defines to stick to the naming used in the other components in terms of
* fallback routines
*/
#define previous_reduce previous_routines[ADAPT_REDUCE].previous_routine.reduce
#define previous_ireduce previous_routines[ADAPT_IREDUCE].previous_routine.ireduce

#define previous_reduce_module previous_routines[ADAPT_REDUCE].previous_module
#define previous_ireduce_module previous_routines[ADAPT_IREDUCE].previous_module


/* Coll adapt module per communicator*/
struct mca_coll_adapt_module_t {
/* Base module */
mca_coll_base_module_t super;

/* To be able to fallback when the cases are not supported */
struct mca_coll_adapt_collective_fallback_s previous_routines[ADAPT_COLLCOUNT];

/* Whether this module has been lazily initialized or not yet */
bool adapt_enabled;
};
Expand Down
8 changes: 2 additions & 6 deletions ompi/mca/coll/adapt/coll_adapt_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,13 @@ struct ompi_coll_adapt_constant_reduce_context_s {
ompi_request_t *request;
int rank;
/* Length of the fragment array, which is the number of recevied segments */
int32_t num_recv_segs;
opal_atomic_int32_t num_recv_segs;
/* Number of sent segments */
int32_t num_sent_segs;
opal_atomic_int32_t num_sent_segs;
/* Next seg need to be received for every children */
opal_atomic_int32_t *next_recv_segs;
/* Mutex to protect recv_list */
opal_mutex_t *mutex_recv_list;
/* Mutex to protect num_recv_segs */
opal_mutex_t *mutex_num_recv_segs;
/* Mutex to protect num_sent */
opal_mutex_t *mutex_num_sent;
/* Mutex to protect each segment when do the reduce op */
opal_mutex_t *mutex_op_list;
/* Reduce operation */
Expand Down
Loading

0 comments on commit b0de06f

Please sign in to comment.