Skip to content

Commit

Permalink
Many fixes and improvements to ADAPT
Browse files Browse the repository at this point in the history
- Add support for fallback to previous coll module on non-commutative operations (#30)
- Replace mutexes by atomic operations.
- Use the correct nbc request type (for both ibcast and ireduce)
  * coll/base: document type casts in ompi_coll_base_retain_*
- add module-wide topology cache
- use standard instead of synchronous send and add mca parameter to control mode of initial send in ireduce/ibcast
- reduce number of memory allocations
- call the default request completion.
  - Remove the requests from the Fortran lookup conversion tables before completing
    and free it.

Signed-off-by: George Bosilca <[email protected]>
Signed-off-by: Joseph Schuchart <[email protected]>

Co-authored-by: Joseph Schuchart <[email protected]>
  • Loading branch information
bosilca and devreal committed Sep 18, 2020
1 parent 43e3add commit c98e387
Show file tree
Hide file tree
Showing 11 changed files with 625 additions and 612 deletions.
4 changes: 3 additions & 1 deletion ompi/mca/coll/adapt/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ sources = \
coll_adapt_inbuf.c \
coll_adapt_inbuf.h \
coll_adapt_item.c \
coll_adapt_item.h
coll_adapt_item.h \
coll_adapt_topocache.c \
coll_adapt_topocache.h

# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
Expand Down
53 changes: 51 additions & 2 deletions ompi/mca/coll/adapt/coll_adapt.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

Expand All @@ -25,6 +25,17 @@ BEGIN_C_DECLS

typedef struct mca_coll_adapt_module_t mca_coll_adapt_module_t;

typedef enum {
OMPI_COLL_ADAPT_ALGORITHM_TUNED = 0,
OMPI_COLL_ADAPT_ALGORITHM_BINOMIAL,
OMPI_COLL_ADAPT_ALGORITHM_IN_ORDER_BINOMIAL,
OMPI_COLL_ADAPT_ALGORITHM_BINARY,
OMPI_COLL_ADAPT_ALGORITHM_PIPELINE,
OMPI_COLL_ADAPT_ALGORITHM_CHAIN,
OMPI_COLL_ADAPT_ALGORITHM_LINEAR,
OMPI_COLL_ADAPT_ALGORITHM_COUNT /* number of algorithms, keep last! */
} ompi_coll_adapt_algorithm_t;

/*
* Structure to hold the adapt coll component. First it holds the
* base coll component, and then holds a bunch of
Expand Down Expand Up @@ -56,6 +67,7 @@ typedef struct mca_coll_adapt_component_t {
size_t adapt_ibcast_segment_size;
int adapt_ibcast_max_send_requests;
int adapt_ibcast_max_recv_requests;
bool adapt_ibcast_synchronous_send;
/* Bcast free list */
opal_free_list_t *adapt_ibcast_context_free_list;

Expand All @@ -67,17 +79,54 @@ typedef struct mca_coll_adapt_component_t {
int adapt_inbuf_free_list_min;
int adapt_inbuf_free_list_max;
int adapt_inbuf_free_list_inc;
bool adapt_ireduce_synchronous_send;

/* Reduce free list */
opal_free_list_t *adapt_ireduce_context_free_list;

} mca_coll_adapt_component_t;

/*
* Structure used to store what is necessary for the collective operations
* routines in case of fallback.
*/
typedef struct mca_coll_adapt_collective_fallback_s {
union {
mca_coll_base_module_reduce_fn_t reduce;
mca_coll_base_module_ireduce_fn_t ireduce;
} previous_routine;
mca_coll_base_module_t *previous_module;
} mca_coll_adapt_collective_fallback_t;


typedef enum mca_coll_adapt_colltype {
ADAPT_REDUCE = 0,
ADAPT_IREDUCE = 1,
ADAPT_COLLCOUNT
} mca_coll_adapt_colltype_t;

/*
* Some defines to stick to the naming used in the other components in terms of
* fallback routines
*/
#define previous_reduce previous_routines[ADAPT_REDUCE].previous_routine.reduce
#define previous_ireduce previous_routines[ADAPT_IREDUCE].previous_routine.ireduce

#define previous_reduce_module previous_routines[ADAPT_REDUCE].previous_module
#define previous_ireduce_module previous_routines[ADAPT_IREDUCE].previous_module


/* Coll adapt module per communicator*/
struct mca_coll_adapt_module_t {
/* Base module */
mca_coll_base_module_t super;

/* To be able to fallback when the cases are not supported */
struct mca_coll_adapt_collective_fallback_s previous_routines[ADAPT_COLLCOUNT];

/* cached topologies */
opal_list_t *topo_cache;

/* Whether this module has been lazily initialized or not yet */
bool adapt_enabled;
};
Expand Down
22 changes: 19 additions & 3 deletions ompi/mca/coll/adapt/coll_adapt_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,31 @@
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

#include "ompi/mca/coll/coll.h"
#include "coll_adapt_context.h"


static void adapt_constant_reduce_context_construct(ompi_coll_adapt_constant_reduce_context_t *context)
{
OBJ_CONSTRUCT(&context->recv_list, opal_list_t);
OBJ_CONSTRUCT(&context->mutex_recv_list, opal_mutex_t);
OBJ_CONSTRUCT(&context->inbuf_list, opal_free_list_t);
}

static void adapt_constant_reduce_context_destruct(ompi_coll_adapt_constant_reduce_context_t *context)
{
OBJ_DESTRUCT(&context->mutex_recv_list);
OBJ_DESTRUCT(&context->recv_list);
OBJ_DESTRUCT(&context->inbuf_list);
}


OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t,
NULL, NULL);

Expand All @@ -23,4 +38,5 @@ OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t,
NULL, NULL);

OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t,
NULL, NULL);
&adapt_constant_reduce_context_construct,
&adapt_constant_reduce_context_destruct);
38 changes: 16 additions & 22 deletions ompi/mca/coll/adapt/coll_adapt_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/

Expand Down Expand Up @@ -74,41 +74,35 @@ struct ompi_coll_adapt_constant_reduce_context_s {
/* Increment of each segment */
int segment_increment;
int num_segs;
ompi_request_t *request;
int rank;
int root;
/* The distance between the address of inbuf->buff and the address of inbuf */
int distance;
int ireduce_tag;
/* How many sends are posted but not finished */
int32_t ongoing_send;
/* Length of the fragment array, which is the number of recevied segments */
int32_t num_recv_segs;
/* Number of sent segments */
int32_t num_sent_segs;
/* Next seg need to be received for every children */
opal_atomic_int32_t *next_recv_segs;
/* Mutex to protect recv_list */
opal_mutex_t *mutex_recv_list;
/* Mutex to protect num_recv_segs */
opal_mutex_t *mutex_num_recv_segs;
/* Mutex to protect num_sent */
opal_mutex_t *mutex_num_sent;
int32_t *next_recv_segs;
/* Mutex to protect each segment when do the reduce op */
opal_mutex_t *mutex_op_list;
/* Reduce operation */
ompi_op_t *op;
ompi_coll_tree_t *tree;
/* Accumulate buff */
char **accumbuf;
/* inbuf list address of accumbuf */
ompi_coll_adapt_inbuf_t ** accumbuf_to_inbuf;
opal_free_list_t *inbuf_list;
/* A list to store the segments which are received and not yet be sent */
opal_list_t *recv_list;
ptrdiff_t lower_bound;
/* How many sends are posted but not finished */
opal_atomic_int32_t ongoing_send;
char *sbuf;
char *rbuf;
int root;
/* The distance between the address of inbuf->buff and the address of inbuf */
int distance;
int ireduce_tag;
opal_free_list_t inbuf_list;
/* Mutex to protect recv_list */
opal_mutex_t mutex_recv_list;
/* A list to store the segments which are received and not yet be sent */
opal_list_t recv_list;
ompi_request_t *request;
};

typedef struct ompi_coll_adapt_constant_reduce_context_s ompi_coll_adapt_constant_reduce_context_t;
Expand All @@ -123,7 +117,7 @@ typedef int (*ompi_coll_adapt_reduce_cuda_callback_fn_t) (ompi_coll_adapt_reduce
struct ompi_coll_adapt_reduce_context_s {
opal_free_list_item_t super;
char *buff;
int frag_id;
int seg_index;
int child_id;
int peer;
ompi_coll_adapt_constant_reduce_context_t *con;
Expand Down
Loading

0 comments on commit c98e387

Please sign in to comment.