From 97eed54bcdeccd49f59b399670a362551755d08c Mon Sep 17 00:00:00 2001 From: bsergentm Date: Wed, 6 May 2020 18:30:03 +0200 Subject: [PATCH] Coll/adapt Bull (#15) * piggybacking Bull functionalities * coll/adapt: Fix naming conventions and C11 atomic use This commit fixes some naming convention issues, such as function names which should follow the naming ompi_coll_adapt instead of mca_coll_adapt, reserved for component and module naming (cf. tuned collective component); It also fixes the use of _Atomic construct, which is only valid in C11. OPAL constructs have already been adapted to that use, so use opal_atomic_* types instead. * coll/adapt: Remove unused component field in module This commit removes an unneeded field referencing the component in the module of adapt, as it is already available through the mca_coll_adapt_component global variable. Signed-off-by: Marc Sergent Co-authored-by: Lemarinier, Pierre Co-authored-by: pierrele <31764860+pierrele@users.noreply.github.com> --- ompi/mca/coll/adapt/coll_adapt.h | 25 ++- ompi/mca/coll/adapt/coll_adapt_algorithms.h | 56 ++++--- ompi/mca/coll/adapt/coll_adapt_bcast.c | 4 +- ompi/mca/coll/adapt/coll_adapt_component.c | 82 ++++++---- ompi/mca/coll/adapt/coll_adapt_context.c | 42 +++-- ompi/mca/coll/adapt/coll_adapt_context.h | 40 ++--- ompi/mca/coll/adapt/coll_adapt_ibcast.c | 127 ++++++++------- ompi/mca/coll/adapt/coll_adapt_inbuf.c | 8 +- ompi/mca/coll/adapt/coll_adapt_inbuf.h | 6 +- ompi/mca/coll/adapt/coll_adapt_ireduce.c | 163 +++++++++++--------- ompi/mca/coll/adapt/coll_adapt_item.c | 8 +- ompi/mca/coll/adapt/coll_adapt_item.h | 6 +- ompi/mca/coll/adapt/coll_adapt_module.c | 68 ++++---- ompi/mca/coll/adapt/coll_adapt_reduce.c | 4 +- 14 files changed, 343 insertions(+), 296 deletions(-) diff --git a/ompi/mca/coll/adapt/coll_adapt.h b/ompi/mca/coll/adapt/coll_adapt.h index 0eaca96e5e7..b2a8fcb949c 100644 --- a/ompi/mca/coll/adapt/coll_adapt.h +++ b/ompi/mca/coll/adapt/coll_adapt.h @@ -21,13 +21,15 @@ #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/coll_base_topo.h" -BEGIN_C_DECLS typedef struct mca_coll_adapt_module_t mca_coll_adapt_module_t; +BEGIN_C_DECLS + +typedef struct mca_coll_adapt_module_t mca_coll_adapt_module_t; /* * Structure to hold the adapt coll component. First it holds the * base coll component, and then holds a bunch of * adapt-coll-component-specific stuff (e.g., current MCA param - * values). + * values). */ typedef struct mca_coll_adapt_component_t { /* Base coll component */ @@ -45,7 +47,7 @@ typedef struct mca_coll_adapt_component_t { /* MCA parameter: Minimum number of segment in context free list */ int adapt_context_free_list_min; - /* MCA parameter: Increasment number of segment in context free list */ + /* MCA parameter: Increasement number of segment in context free list */ int adapt_context_free_list_inc; /* Bcast MCA parameter */ @@ -55,7 +57,7 @@ typedef struct mca_coll_adapt_component_t { int adapt_ibcast_max_recv_requests; /* Bcast free list */ opal_free_list_t *adapt_ibcast_context_free_list; - _Atomic int32_t adapt_ibcast_context_free_list_enabled; + opal_atomic_int32_t adapt_ibcast_context_free_list_enabled; /* Reduce MCA parameter */ int adapt_ireduce_algorithm; @@ -68,7 +70,7 @@ typedef struct mca_coll_adapt_component_t { /* Reduce free list */ opal_free_list_t *adapt_ireduce_context_free_list; - _Atomic int32_t adapt_ireduce_context_free_list_enabled; + opal_atomic_int32_t adapt_ireduce_context_free_list_enabled; } mca_coll_adapt_component_t; @@ -78,9 +80,7 @@ struct mca_coll_adapt_module_t { mca_coll_base_module_t super; /* Whether this module has been lazily initialized or not yet */ - bool enabled; - /* Pointer to mca_coll_adapt_component */ - mca_coll_adapt_component_t *adapt_component; + bool adapt_enabled; }; OBJ_CLASS_DECLARATION(mca_coll_adapt_module_t); @@ -88,11 +88,10 @@ OBJ_CLASS_DECLARATION(mca_coll_adapt_module_t); OMPI_MODULE_DECLSPEC extern mca_coll_adapt_component_t mca_coll_adapt_component; /* ADAPT module functions */ -int mca_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads); - -mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority); +int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads); +mca_coll_base_module_t * ompi_coll_adapt_comm_query(struct ompi_communicator_t *comm, int *priority); /* Free ADAPT quest */ -int adapt_request_free(ompi_request_t ** request); +int ompi_coll_adapt_request_free(ompi_request_t **request); -#endif /* MCA_COLL_ADAPT_EXPORT_H */ +#endif /* MCA_COLL_ADAPT_EXPORT_H */ diff --git a/ompi/mca/coll/adapt/coll_adapt_algorithms.h b/ompi/mca/coll/adapt/coll_adapt_algorithms.h index 8b7b7cebd4f..f0b67b787d8 100644 --- a/ompi/mca/coll/adapt/coll_adapt_algorithms.h +++ b/ompi/mca/coll/adapt/coll_adapt_algorithms.h @@ -14,82 +14,88 @@ #include "ompi/mca/coll/base/coll_base_functions.h" #include -typedef struct mca_coll_adapt_algorithm_index_s { +typedef struct ompi_coll_adapt_algorithm_index_s { int algorithm_index; uintptr_t algorithm_fn_ptr; -} mca_coll_adapt_algorithm_index_t; +} ompi_coll_adapt_algorithm_index_t; /* Bcast */ -int mca_coll_adapt_ibcast_init(void); -int mca_coll_adapt_ibcast_fini(void); -int mca_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_init(void); +int ompi_coll_adapt_ibcast_fini(void); +int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t * module); -int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module); -int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, ompi_coll_tree_t * tree, size_t seg_size, int ibcast_tag); -int mca_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, +int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag); -int mca_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype, +int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag); -int mca_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag); -int mca_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype, +int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag); -int mca_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag); -int mca_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag); - +int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, + mca_coll_base_module_t *module, int ibcast_tag); /* Reduce */ -int mca_coll_adapt_ireduce_init(void); -int mca_coll_adapt_ireduce_fini(void); -int mca_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, +int ompi_coll_adapt_ireduce_init(void); +int ompi_coll_adapt_ireduce_fini(void); +int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t * module); -int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, +int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module); -int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, ompi_coll_tree_t * tree, size_t seg_size, int ireduce_tag); -int mca_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, + struct ompi_communicator_t *comm, ompi_request_t ** request, + mca_coll_base_module_t *module, int ireduce_tag); +int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag); -int mca_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag); -int mca_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag); -int mca_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag); -int mca_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag); -int mca_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag); diff --git a/ompi/mca/coll/adapt/coll_adapt_bcast.c b/ompi/mca/coll/adapt/coll_adapt_bcast.c index 4348f2dc3b5..604898b2e54 100644 --- a/ompi/mca/coll/adapt/coll_adapt_bcast.c +++ b/ompi/mca/coll/adapt/coll_adapt_bcast.c @@ -12,14 +12,14 @@ #include "coll_adapt.h" #include "coll_adapt_algorithms.h" -int mca_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_bcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t * module) { if (count == 0) { return MPI_SUCCESS; } else { ompi_request_t *request; - int err = mca_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module); + int err = ompi_coll_adapt_ibcast(buff, count, datatype, root, comm, &request, module); ompi_request_wait(&request, MPI_STATUS_IGNORE); return err; } diff --git a/ompi/mca/coll/adapt/coll_adapt_component.c b/ompi/mca/coll/adapt/coll_adapt_component.c index 6079c4d92ea..d38cd42b42b 100644 --- a/ompi/mca/coll/adapt/coll_adapt_component.c +++ b/ompi/mca/coll/adapt/coll_adapt_component.c @@ -36,35 +36,32 @@ static int adapt_register(void); */ mca_coll_adapt_component_t mca_coll_adapt_component = { - /* First, fill in the super */ - { - /* First, the mca_component_t struct containing meta - information about the component itself */ - - { - MCA_COLL_BASE_VERSION_2_0_0, - - /* Component name and version */ - "adapt", - OMPI_MAJOR_VERSION, - OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION, - - /* Component functions */ - adapt_open, /* open */ - adapt_close, - NULL, /* query */ - adapt_register}, - { - /* The component is not checkpoint ready */ - MCA_BASE_METADATA_PARAM_NONE}, - - /* Initialization / querying functions */ - mca_coll_adapt_init_query, - mca_coll_adapt_comm_query, - }, + /* First, the mca_component_t struct containing meta + information about the component itself */ + .collm_version = { + MCA_COLL_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "adapt", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + + /* Component functions */ + .mca_open_component = adapt_open, + .mca_close_component = adapt_close, + .mca_register_component_params = adapt_register, + }, + .collm_data = { + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + }, + + /* Initialization / querying functions */ + .collm_init_query = ompi_coll_adapt_init_query, + .collm_comm_query = ompi_coll_adapt_comm_query, + }, /* adapt-component specific information */ @@ -81,6 +78,25 @@ mca_coll_adapt_component_t mca_coll_adapt_component = { /* Open the component */ static int adapt_open(void) { + int param; + mca_coll_adapt_component_t *cs = &mca_coll_adapt_component; + + /* + * Get the global coll verbosity: it will be ours + */ + param = mca_base_var_find("ompi", "coll", "base", "verbose"); + if (param >= 0) { + const int *verbose = NULL; + mca_base_var_get_value(param, &verbose, NULL, NULL); + if (verbose && verbose[0] > 0) { + cs->adapt_output = opal_output_open(NULL); + opal_output_set_verbosity(cs->adapt_output, verbose[0]); + } + } + + opal_output_verbose(1, cs->adapt_output, + "coll:adapt:component_open: done!"); + return OMPI_SUCCESS; } @@ -88,8 +104,8 @@ static int adapt_open(void) /* Shut down the component */ static int adapt_close(void) { - mca_coll_adapt_ibcast_fini(); - mca_coll_adapt_ireduce_fini(); + ompi_coll_adapt_ibcast_fini(); + ompi_coll_adapt_ireduce_fini(); return OMPI_SUCCESS; } @@ -125,7 +141,7 @@ static int adapt_register(void) opal_output_set_verbosity(cs->adapt_output, adapt_verbose); cs->adapt_context_free_list_min = 10; - (void) mca_base_component_var_register(c, "context_free_list_max", + (void) mca_base_component_var_register(c, "context_free_list_min", "Minimum number of segments in context free list", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, @@ -133,7 +149,7 @@ static int adapt_register(void) &cs->adapt_context_free_list_min); cs->adapt_context_free_list_max = 10000; - (void) mca_base_component_var_register(c, "context_free_list_min", + (void) mca_base_component_var_register(c, "context_free_list_max", "Maximum number of segments in context free list", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, @@ -147,8 +163,8 @@ static int adapt_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_context_free_list_inc); - mca_coll_adapt_ibcast_init(); - mca_coll_adapt_ireduce_init(); + ompi_coll_adapt_ibcast_init(); + ompi_coll_adapt_ireduce_init(); return adapt_verify_mca_variables(); } diff --git a/ompi/mca/coll/adapt/coll_adapt_context.c b/ompi/mca/coll/adapt/coll_adapt_context.c index 978739df9ab..be03127f23c 100644 --- a/ompi/mca/coll/adapt/coll_adapt_context.c +++ b/ompi/mca/coll/adapt/coll_adapt_context.c @@ -12,60 +12,58 @@ #include "ompi/mca/coll/coll.h" #include "coll_adapt_context.h" -static void mca_coll_adapt_bcast_context_constructor(mca_coll_adapt_bcast_context_t * bcast_context) +static void ompi_coll_adapt_bcast_context_constructor(ompi_coll_adapt_bcast_context_t * bcast_context) { } -static void mca_coll_adapt_bcast_context_destructor(mca_coll_adapt_bcast_context_t * bcast_context) +static void ompi_coll_adapt_bcast_context_destructor(ompi_coll_adapt_bcast_context_t * bcast_context) { - } static void -mca_coll_adapt_constant_bcast_context_constructor(mca_coll_adapt_constant_bcast_context_t * con) +ompi_coll_adapt_constant_bcast_context_constructor(ompi_coll_adapt_constant_bcast_context_t * con) { } -static void mca_coll_adapt_constant_bcast_context_destructor(mca_coll_adapt_constant_bcast_context_t +static void ompi_coll_adapt_constant_bcast_context_destructor(ompi_coll_adapt_constant_bcast_context_t * con) { } -OBJ_CLASS_INSTANCE(mca_coll_adapt_bcast_context_t, opal_free_list_item_t, - mca_coll_adapt_bcast_context_constructor, - mca_coll_adapt_bcast_context_destructor); +OBJ_CLASS_INSTANCE(ompi_coll_adapt_bcast_context_t, opal_free_list_item_t, + ompi_coll_adapt_bcast_context_constructor, + ompi_coll_adapt_bcast_context_destructor); -OBJ_CLASS_INSTANCE(mca_coll_adapt_constant_bcast_context_t, opal_object_t, - mca_coll_adapt_constant_bcast_context_constructor, - mca_coll_adapt_constant_bcast_context_destructor); +OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_bcast_context_t, opal_object_t, + ompi_coll_adapt_constant_bcast_context_constructor, + ompi_coll_adapt_constant_bcast_context_destructor); -static void mca_coll_adapt_reduce_context_constructor(mca_coll_adapt_reduce_context_t * +static void ompi_coll_adapt_reduce_context_constructor(ompi_coll_adapt_reduce_context_t * reduce_context) { } -static void mca_coll_adapt_reduce_context_destructor(mca_coll_adapt_reduce_context_t * +static void ompi_coll_adapt_reduce_context_destructor(ompi_coll_adapt_reduce_context_t * reduce_context) { - } static void -mca_coll_adapt_constant_reduce_context_constructor(mca_coll_adapt_constant_reduce_context_t * con) +ompi_coll_adapt_constant_reduce_context_constructor(ompi_coll_adapt_constant_reduce_context_t * con) { } static void -mca_coll_adapt_constant_reduce_context_destructor(mca_coll_adapt_constant_reduce_context_t * con) +ompi_coll_adapt_constant_reduce_context_destructor(ompi_coll_adapt_constant_reduce_context_t * con) { } -OBJ_CLASS_INSTANCE(mca_coll_adapt_reduce_context_t, opal_free_list_item_t, - mca_coll_adapt_reduce_context_constructor, - mca_coll_adapt_reduce_context_destructor); +OBJ_CLASS_INSTANCE(ompi_coll_adapt_reduce_context_t, opal_free_list_item_t, + ompi_coll_adapt_reduce_context_constructor, + ompi_coll_adapt_reduce_context_destructor); -OBJ_CLASS_INSTANCE(mca_coll_adapt_constant_reduce_context_t, opal_object_t, - mca_coll_adapt_constant_reduce_context_constructor, - mca_coll_adapt_constant_reduce_context_destructor); +OBJ_CLASS_INSTANCE(ompi_coll_adapt_constant_reduce_context_t, opal_object_t, + ompi_coll_adapt_constant_reduce_context_constructor, + ompi_coll_adapt_constant_reduce_context_destructor); diff --git a/ompi/mca/coll/adapt/coll_adapt_context.h b/ompi/mca/coll/adapt/coll_adapt_context.h index 917e3d48861..eea98fb872e 100644 --- a/ompi/mca/coll/adapt/coll_adapt_context.h +++ b/ompi/mca/coll/adapt/coll_adapt_context.h @@ -19,7 +19,7 @@ #include "coll_adapt_inbuf.h" /* Bcast constant context in bcast context */ -struct mca_coll_adapt_constant_bcast_context_s { +struct ompi_coll_adapt_constant_bcast_context_s { opal_object_t super; int root; size_t count; @@ -42,29 +42,29 @@ struct mca_coll_adapt_constant_bcast_context_s { int ibcast_tag; }; -typedef struct mca_coll_adapt_constant_bcast_context_s mca_coll_adapt_constant_bcast_context_t; +typedef struct ompi_coll_adapt_constant_bcast_context_s ompi_coll_adapt_constant_bcast_context_t; -OBJ_CLASS_DECLARATION(mca_coll_adapt_constant_bcast_context_t); +OBJ_CLASS_DECLARATION(ompi_coll_adapt_constant_bcast_context_t); /* Bcast context of each segment*/ -typedef struct mca_coll_adapt_bcast_context_s mca_coll_adapt_bcast_context_t; +typedef struct ompi_coll_adapt_bcast_context_s ompi_coll_adapt_bcast_context_t; -typedef int (*mca_coll_adapt_bcast_cuda_callback_fn_t) (mca_coll_adapt_bcast_context_t * context); +typedef int (*ompi_coll_adapt_bcast_cuda_callback_fn_t) (ompi_coll_adapt_bcast_context_t * context); -struct mca_coll_adapt_bcast_context_s { +struct ompi_coll_adapt_bcast_context_s { opal_free_list_item_t super; char *buff; int frag_id; int child_id; int peer; - mca_coll_adapt_constant_bcast_context_t *con; + ompi_coll_adapt_constant_bcast_context_t *con; }; -OBJ_CLASS_DECLARATION(mca_coll_adapt_bcast_context_t); +OBJ_CLASS_DECLARATION(ompi_coll_adapt_bcast_context_t); /* Reduce constant context in reduce context */ -struct mca_coll_adapt_constant_reduce_context_s { +struct ompi_coll_adapt_constant_reduce_context_s { opal_object_t super; size_t count; size_t seg_count; @@ -81,7 +81,7 @@ struct mca_coll_adapt_constant_reduce_context_s { /* Number of sent segments */ int32_t num_sent_segs; /* Next seg need to be received for every children */ - _Atomic int32_t *next_recv_segs; + opal_atomic_int32_t *next_recv_segs; /* Mutex to protect recv_list */ opal_mutex_t *mutex_recv_list; /* Mutex to protect num_recv_segs */ @@ -95,12 +95,14 @@ struct mca_coll_adapt_constant_reduce_context_s { ompi_coll_tree_t *tree; /* Accumulate buff */ char **accumbuf; + /* inbuf list address of accumbuf */ + ompi_coll_adapt_inbuf_t ** accumbuf_to_inbuf; opal_free_list_t *inbuf_list; /* A list to store the segments which are received and not yet be sent */ opal_list_t *recv_list; ptrdiff_t lower_bound; /* How many sends are posted but not finished */ - _Atomic int32_t ongoing_send; + opal_atomic_int32_t ongoing_send; char *sbuf; char *rbuf; int root; @@ -109,24 +111,24 @@ struct mca_coll_adapt_constant_reduce_context_s { int ireduce_tag; }; -typedef struct mca_coll_adapt_constant_reduce_context_s mca_coll_adapt_constant_reduce_context_t; +typedef struct ompi_coll_adapt_constant_reduce_context_s ompi_coll_adapt_constant_reduce_context_t; -OBJ_CLASS_DECLARATION(mca_coll_adapt_constant_reduce_context_t); +OBJ_CLASS_DECLARATION(ompi_coll_adapt_constant_reduce_context_t); /* Reduce context of each segment */ -typedef struct mca_coll_adapt_reduce_context_s mca_coll_adapt_reduce_context_t; +typedef struct ompi_coll_adapt_reduce_context_s ompi_coll_adapt_reduce_context_t; -typedef int (*mca_coll_adapt_reduce_cuda_callback_fn_t) (mca_coll_adapt_reduce_context_t * context); +typedef int (*ompi_coll_adapt_reduce_cuda_callback_fn_t) (ompi_coll_adapt_reduce_context_t * context); -struct mca_coll_adapt_reduce_context_s { +struct ompi_coll_adapt_reduce_context_s { opal_free_list_item_t super; char *buff; int frag_id; int child_id; int peer; - mca_coll_adapt_constant_reduce_context_t *con; + ompi_coll_adapt_constant_reduce_context_t *con; /* store the incoming segment */ - mca_coll_adapt_inbuf_t *inbuf; + ompi_coll_adapt_inbuf_t *inbuf; }; -OBJ_CLASS_DECLARATION(mca_coll_adapt_reduce_context_t); +OBJ_CLASS_DECLARATION(ompi_coll_adapt_reduce_context_t); diff --git a/ompi/mca/coll/adapt/coll_adapt_ibcast.c b/ompi/mca/coll/adapt/coll_adapt_ibcast.c index 3582bafcb62..c3f0868102a 100644 --- a/ompi/mca/coll/adapt/coll_adapt_ibcast.c +++ b/ompi/mca/coll/adapt/coll_adapt_ibcast.c @@ -21,33 +21,35 @@ #include "ompi/mca/pml/ob1/pml_ob1.h" -typedef int (*mca_coll_adapt_ibcast_fn_t) (void *buff, +typedef int (*ompi_coll_adapt_ibcast_fn_t) (void *buff, int count, struct ompi_datatype_t * datatype, int root, struct ompi_communicator_t * comm, ompi_request_t ** request, - mca_coll_base_module_t * module, int ibcast_tag); - -static mca_coll_adapt_algorithm_index_t mca_coll_adapt_ibcast_algorithm_index[] = { - {1, (uintptr_t) mca_coll_adapt_ibcast_binomial}, - {2, (uintptr_t) mca_coll_adapt_ibcast_in_order_binomial}, - {3, (uintptr_t) mca_coll_adapt_ibcast_binary}, - {4, (uintptr_t) mca_coll_adapt_ibcast_pipeline}, - {5, (uintptr_t) mca_coll_adapt_ibcast_chain}, - {6, (uintptr_t) mca_coll_adapt_ibcast_linear}, + mca_coll_base_module_t * module, + int ibcast_tag); + +static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ibcast_algorithm_index[] = { + {0, (uintptr_t) ompi_coll_adapt_ibcast_tuned}, + {1, (uintptr_t) ompi_coll_adapt_ibcast_binomial}, + {2, (uintptr_t) ompi_coll_adapt_ibcast_in_order_binomial}, + {3, (uintptr_t) ompi_coll_adapt_ibcast_binary}, + {4, (uintptr_t) ompi_coll_adapt_ibcast_pipeline}, + {5, (uintptr_t) ompi_coll_adapt_ibcast_chain}, + {6, (uintptr_t) ompi_coll_adapt_ibcast_linear}, }; /* * Set up MCA parameters of MPI_Bcast and MPI_IBcast */ -int mca_coll_adapt_ibcast_init(void) +int ompi_coll_adapt_ibcast_init(void) { mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version; mca_coll_adapt_component.adapt_ibcast_algorithm = 1; mca_base_component_var_register(c, "bcast_algorithm", - "Algorithm of broadcast, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + "Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &mca_coll_adapt_component.adapt_ibcast_algorithm); @@ -81,15 +83,15 @@ int mca_coll_adapt_ibcast_init(void) } /* - * Release the free list created in mca_coll_adapt_ibcast_generic + * Release the free list created in ompi_coll_adapt_ibcast_generic */ -int mca_coll_adapt_ibcast_fini(void) +int ompi_coll_adapt_ibcast_fini(void) { if (NULL != mca_coll_adapt_component.adapt_ibcast_context_free_list) { OBJ_RELEASE(mca_coll_adapt_component.adapt_ibcast_context_free_list); mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL; mca_coll_adapt_component.adapt_ibcast_context_free_list_enabled = 0; - OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n")); + OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "ibcast fini\n")); } return OMPI_SUCCESS; } @@ -97,7 +99,7 @@ int mca_coll_adapt_ibcast_fini(void) /* * Finish a ibcast request */ -static int ibcast_request_fini(mca_coll_adapt_bcast_context_t * context) +static int ibcast_request_fini(ompi_coll_adapt_bcast_context_t * context) { ompi_request_t *temp_req = context->con->request; if (context->con->tree->tree_nextsize != 0) { @@ -121,8 +123,8 @@ static int ibcast_request_fini(mca_coll_adapt_bcast_context_t * context) */ static int send_cb(ompi_request_t * req) { - mca_coll_adapt_bcast_context_t *context = - (mca_coll_adapt_bcast_context_t *) req->req_complete_cb_data; + ompi_coll_adapt_bcast_context_t *context = + (ompi_coll_adapt_bcast_context_t *) req->req_complete_cb_data; int err; @@ -136,10 +138,11 @@ static int send_cb(ompi_request_t * req) /* If the current process has fragments in recv_array can be sent */ if (sent_id < context->con->num_recv_segs) { ompi_request_t *send_req; + ompi_coll_adapt_bcast_context_t *send_context; + opal_free_list_t *free_list; int new_id = context->con->recv_array[sent_id]; - mca_coll_adapt_bcast_context_t *send_context = - (mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component. - adapt_ibcast_context_free_list); + free_list = mca_coll_adapt_component.adapt_ibcast_context_free_list; + send_context = (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(free_list); send_context->buff = context->buff + (new_id - context->frag_id) * context->con->real_seg_size; send_context->frag_id = new_id; @@ -206,8 +209,8 @@ static int send_cb(ompi_request_t * req) static int recv_cb(ompi_request_t * req) { /* Get necessary info from request */ - mca_coll_adapt_bcast_context_t *context = - (mca_coll_adapt_bcast_context_t *) req->req_complete_cb_data; + ompi_coll_adapt_bcast_context_t *context = + (ompi_coll_adapt_bcast_context_t *) req->req_complete_cb_data; int err, i; OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, @@ -220,14 +223,15 @@ static int recv_cb(ompi_request_t * req) int num_recv_segs_t = ++(context->con->num_recv_segs); context->con->recv_array[num_recv_segs_t - 1] = context->frag_id; + opal_free_list_t *free_list; int new_id = num_recv_segs_t + mca_coll_adapt_component.adapt_ibcast_max_recv_requests - 1; /* Receive new segment */ if (new_id < context->con->num_segs) { ompi_request_t *recv_req; + ompi_coll_adapt_bcast_context_t *recv_context; + free_list = mca_coll_adapt_component.adapt_ibcast_context_free_list; /* Get new context item from free list */ - mca_coll_adapt_bcast_context_t *recv_context = - (mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component. - adapt_ibcast_context_free_list); + recv_context = (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(free_list); recv_context->buff = context->buff + (new_id - context->frag_id) * context->con->real_seg_size; recv_context->frag_id = new_id; @@ -266,9 +270,9 @@ static int recv_cb(ompi_request_t * req) send_count = context->con->count - context->frag_id * context->con->seg_count; } - mca_coll_adapt_bcast_context_t *send_context = - (mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component. - adapt_ibcast_context_free_list); + ompi_coll_adapt_bcast_context_t *send_context; + free_list = mca_coll_adapt_component.adapt_ibcast_context_free_list; + send_context = (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(free_list); send_context->buff = context->buff; send_context->frag_id = context->frag_id; send_context->child_id = i; @@ -326,7 +330,7 @@ static int recv_cb(ompi_request_t * req) return 1; } -int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module) { @@ -335,7 +339,7 @@ int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatyp temp_request = OBJ_NEW(ompi_request_t); OMPI_REQUEST_INIT(temp_request, false); temp_request->req_type = 0; - temp_request->req_free = adapt_request_free; + temp_request->req_free = ompi_coll_adapt_request_free; temp_request->req_status.MPI_SOURCE = 0; temp_request->req_status.MPI_TAG = 0; temp_request->req_status.MPI_ERROR = 0; @@ -356,9 +360,9 @@ int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatyp } int ibcast_tag = opal_atomic_add_fetch_32(&(comm->c_ibcast_tag), 1); ibcast_tag = ibcast_tag % 4096; - mca_coll_adapt_ibcast_fn_t bcast_func = - (mca_coll_adapt_ibcast_fn_t) - mca_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm]. + ompi_coll_adapt_ibcast_fn_t bcast_func = + (ompi_coll_adapt_ibcast_fn_t) + ompi_coll_adapt_ibcast_algorithm_index[mca_coll_adapt_component.adapt_ibcast_algorithm]. algorithm_fn_ptr; return bcast_func(buff, count, datatype, root, comm, request, module, ibcast_tag); } @@ -367,72 +371,81 @@ int mca_coll_adapt_ibcast(void *buff, int count, struct ompi_datatype_t *datatyp /* * Ibcast functions with different algorithms */ -int mca_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, +int ompi_coll_adapt_ibcast_tuned(void *buff, int count, struct ompi_datatype_t *datatype, + int root, struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module, int ibcast_tag) +{ + OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n")); + return OMPI_SUCCESS; +} + +int ompi_coll_adapt_ibcast_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_bmtree(comm, root); int err = - mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, + ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ibcast_segment_size, ibcast_tag); return err; } -int mca_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype, +int ompi_coll_adapt_ibcast_in_order_binomial(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_in_order_bmtree(comm, root); int err = - mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, + ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ibcast_segment_size, ibcast_tag); return err; } -int mca_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_binary(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_tree(2, comm, root); int err = - mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, + ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ibcast_segment_size, ibcast_tag); return err; } -int mca_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype, +int ompi_coll_adapt_ibcast_pipeline(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(1, comm, root); int err = - mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, + ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ibcast_segment_size, ibcast_tag); return err; } -int mca_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_chain(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(4, comm, root); int err = - mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, + ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ibcast_segment_size, ibcast_tag); return err; } -int mca_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ibcast_tag) { @@ -446,14 +459,14 @@ int mca_coll_adapt_ibcast_linear(void *buff, int count, struct ompi_datatype_t * tree = ompi_coll_base_topo_build_tree(MAXTREEFANOUT, comm, root); } int err = - mca_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, + ompi_coll_adapt_ibcast_generic(buff, count, datatype, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ibcast_segment_size, ibcast_tag); return err; } -int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, +int ompi_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, ompi_coll_tree_t * tree, size_t seg_size, int ibcast_tag) @@ -494,9 +507,9 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t if (1 == context_free_list_enabled) { mca_coll_adapt_component.adapt_ibcast_context_free_list = OBJ_NEW(opal_free_list_t); opal_free_list_init(mca_coll_adapt_component.adapt_ibcast_context_free_list, - sizeof(mca_coll_adapt_bcast_context_t), + sizeof(ompi_coll_adapt_bcast_context_t), opal_cache_line_size, - OBJ_CLASS(mca_coll_adapt_bcast_context_t), + OBJ_CLASS(ompi_coll_adapt_bcast_context_t), 0, opal_cache_line_size, mca_coll_adapt_component.adapt_context_free_list_min, mca_coll_adapt_component.adapt_context_free_list_max, @@ -510,7 +523,7 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t OMPI_REQUEST_INIT(temp_request, false); temp_request->req_state = OMPI_REQUEST_ACTIVE; temp_request->req_type = 0; - temp_request->req_free = adapt_request_free; + temp_request->req_free = ompi_coll_adapt_request_free; temp_request->req_status.MPI_SOURCE = 0; temp_request->req_status.MPI_TAG = 0; temp_request->req_status.MPI_ERROR = 0; @@ -540,7 +553,7 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t } /* Set constant context for send and recv call back */ - mca_coll_adapt_constant_bcast_context_t *con = OBJ_NEW(mca_coll_adapt_constant_bcast_context_t); + ompi_coll_adapt_constant_bcast_context_t *con = OBJ_NEW(ompi_coll_adapt_constant_bcast_context_t); con->root = root; con->count = count; con->seg_count = seg_count; @@ -582,7 +595,7 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t recv_array[i] = i; } con->num_recv_segs = num_segs; - /* Set send_array, will send adapt_ibcast_max_send_requests segments */ + /* Set send_array, will send ompi_coll_adapt_ibcast_max_send_requests segments */ for (i = 0; i < tree->tree_nextsize; i++) { send_array[i] = mca_coll_adapt_component.adapt_ibcast_max_send_requests; } @@ -595,8 +608,8 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t send_count = count - i * seg_count; } for (j = 0; j < tree->tree_nextsize; j++) { - mca_coll_adapt_bcast_context_t *context = - (mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component. + ompi_coll_adapt_bcast_context_t *context = + (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component. adapt_ibcast_context_free_list); context->buff = (char *) buff + i * real_seg_size; context->frag_id = i; @@ -656,8 +669,8 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t if (i == (num_segs - 1)) { recv_count = count - i * seg_count; } - mca_coll_adapt_bcast_context_t *context = - (mca_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component. + ompi_coll_adapt_bcast_context_t *context = + (ompi_coll_adapt_bcast_context_t *) opal_free_list_wait(mca_coll_adapt_component. adapt_ibcast_context_free_list); context->buff = (char *) buff + i * real_seg_size; context->frag_id = i; @@ -691,4 +704,4 @@ int mca_coll_adapt_ibcast_generic(void *buff, int count, struct ompi_datatype_t "[%d]: End of Ibcast\n", rank)); return MPI_SUCCESS; -} \ No newline at end of file +} diff --git a/ompi/mca/coll/adapt/coll_adapt_inbuf.c b/ompi/mca/coll/adapt/coll_adapt_inbuf.c index 79162966624..a1723ac13d0 100644 --- a/ompi/mca/coll/adapt/coll_adapt_inbuf.c +++ b/ompi/mca/coll/adapt/coll_adapt_inbuf.c @@ -12,13 +12,13 @@ #include "coll_adapt.h" #include "coll_adapt_inbuf.h" -static void mca_coll_adapt_inbuf_constructor(mca_coll_adapt_inbuf_t * inbuf) +static void ompi_coll_adapt_inbuf_constructor(ompi_coll_adapt_inbuf_t * inbuf) { } -static void mca_coll_adapt_inbuf_destructor(mca_coll_adapt_inbuf_t * inbuf) +static void ompi_coll_adapt_inbuf_destructor(ompi_coll_adapt_inbuf_t * inbuf) { } -OBJ_CLASS_INSTANCE(mca_coll_adapt_inbuf_t, opal_free_list_item_t, mca_coll_adapt_inbuf_constructor, - mca_coll_adapt_inbuf_destructor); +OBJ_CLASS_INSTANCE(ompi_coll_adapt_inbuf_t, opal_free_list_item_t, ompi_coll_adapt_inbuf_constructor, + ompi_coll_adapt_inbuf_destructor); diff --git a/ompi/mca/coll/adapt/coll_adapt_inbuf.h b/ompi/mca/coll/adapt/coll_adapt_inbuf.h index 1d450e59ff7..93c3060333b 100644 --- a/ompi/mca/coll/adapt/coll_adapt_inbuf.h +++ b/ompi/mca/coll/adapt/coll_adapt_inbuf.h @@ -14,13 +14,13 @@ #include "opal/class/opal_free_list.h" -struct mca_coll_adapt_inbuf_s { +struct ompi_coll_adapt_inbuf_s { opal_free_list_item_t super; char buff[1]; }; -typedef struct mca_coll_adapt_inbuf_s mca_coll_adapt_inbuf_t; +typedef struct ompi_coll_adapt_inbuf_s ompi_coll_adapt_inbuf_t; -OBJ_CLASS_DECLARATION(mca_coll_adapt_inbuf_t); +OBJ_CLASS_DECLARATION(ompi_coll_adapt_inbuf_t); #endif /* MCA_COLL_ADAPT_INBUF_H */ diff --git a/ompi/mca/coll/adapt/coll_adapt_ireduce.c b/ompi/mca/coll/adapt/coll_adapt_ireduce.c index d99bb87f998..f90c14874f8 100644 --- a/ompi/mca/coll/adapt/coll_adapt_ireduce.c +++ b/ompi/mca/coll/adapt/coll_adapt_ireduce.c @@ -24,7 +24,7 @@ /* MPI_Reduce and MPI_Ireduce in the ADAPT module only work for commutative operations */ -typedef int (*mca_coll_adapt_ireduce_fn_t) (const void *sbuf, +typedef int (*ompi_coll_adapt_ireduce_fn_t) (const void *sbuf, void *rbuf, int count, struct ompi_datatype_t * datatype, @@ -34,19 +34,20 @@ typedef int (*mca_coll_adapt_ireduce_fn_t) (const void *sbuf, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag); -static mca_coll_adapt_algorithm_index_t mca_coll_adapt_ireduce_algorithm_index[] = { - {1, (uintptr_t) mca_coll_adapt_ireduce_binomial}, - {2, (uintptr_t) mca_coll_adapt_ireduce_in_order_binomial}, - {3, (uintptr_t) mca_coll_adapt_ireduce_binary}, - {4, (uintptr_t) mca_coll_adapt_ireduce_pipeline}, - {5, (uintptr_t) mca_coll_adapt_ireduce_chain}, - {6, (uintptr_t) mca_coll_adapt_ireduce_linear}, +static ompi_coll_adapt_algorithm_index_t ompi_coll_adapt_ireduce_algorithm_index[] = { + {0, (uintptr_t)ompi_coll_adapt_ireduce_tuned}, + {1, (uintptr_t) ompi_coll_adapt_ireduce_binomial}, + {2, (uintptr_t) ompi_coll_adapt_ireduce_in_order_binomial}, + {3, (uintptr_t) ompi_coll_adapt_ireduce_binary}, + {4, (uintptr_t) ompi_coll_adapt_ireduce_pipeline}, + {5, (uintptr_t) ompi_coll_adapt_ireduce_chain}, + {6, (uintptr_t) ompi_coll_adapt_ireduce_linear}, }; /* * Set up MCA parameters of MPI_Reduce and MPI_Ireduce */ -int mca_coll_adapt_ireduce_init(void) +int ompi_coll_adapt_ireduce_init(void) { mca_base_component_t *c = &mca_coll_adapt_component.super.collm_version; @@ -111,9 +112,9 @@ int mca_coll_adapt_ireduce_init(void) } /* - * Release the free list created in mca_coll_adapt_ireduce_generic + * Release the free list created in ompi_coll_adapt_ireduce_generic */ -int mca_coll_adapt_ireduce_fini(void) +int ompi_coll_adapt_ireduce_fini(void) { if (NULL != mca_coll_adapt_component.adapt_ireduce_context_free_list) { OBJ_RELEASE(mca_coll_adapt_component.adapt_ireduce_context_free_list); @@ -127,15 +128,15 @@ int mca_coll_adapt_ireduce_fini(void) /* * Functions to access list */ -static mca_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_children) +static ompi_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_children) { - mca_coll_adapt_item_t *item; + ompi_coll_adapt_item_t *item; if (opal_list_is_empty(list)) { return NULL; } - for (item = (mca_coll_adapt_item_t *) opal_list_get_first(list); - item != (mca_coll_adapt_item_t *) opal_list_get_end(list); - item = (mca_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) { + for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list); + item != (ompi_coll_adapt_item_t *) opal_list_get_end(list); + item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) { if (item->count == num_children) { opal_list_remove_item(list, (opal_list_item_t *) item); return item; @@ -146,11 +147,11 @@ static mca_coll_adapt_item_t *get_next_ready_item(opal_list_t * list, int num_ch static int add_to_list(opal_list_t * list, int id) { - mca_coll_adapt_item_t *item; + ompi_coll_adapt_item_t *item; int ret = 0; - for (item = (mca_coll_adapt_item_t *) opal_list_get_first(list); - item != (mca_coll_adapt_item_t *) opal_list_get_end(list); - item = (mca_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) { + for (item = (ompi_coll_adapt_item_t *) opal_list_get_first(list); + item != (ompi_coll_adapt_item_t *) opal_list_get_end(list); + item = (ompi_coll_adapt_item_t *) ((opal_list_item_t *) item)->opal_list_next) { if (item->id == id) { (item->count)++; ret = 1; @@ -158,7 +159,7 @@ static int add_to_list(opal_list_t * list, int id) } } if (ret == 0) { - item = OBJ_NEW(mca_coll_adapt_item_t); + item = OBJ_NEW(ompi_coll_adapt_item_t); item->id = id; item->count = 1; opal_list_append(list, (opal_list_item_t *) item); @@ -172,15 +173,15 @@ static int add_to_list(opal_list_t * list, int id) /* * Get the inbuf address */ -static mca_coll_adapt_inbuf_t *to_inbuf(char *buf, int distance) +static ompi_coll_adapt_inbuf_t *to_inbuf(char *buf, int distance) { - return (mca_coll_adapt_inbuf_t *) (buf - distance); + return (ompi_coll_adapt_inbuf_t *) (buf - distance); } /* * Finish a ireduce request */ -static int ireduce_request_fini(mca_coll_adapt_reduce_context_t * context) +static int ireduce_request_fini(ompi_coll_adapt_reduce_context_t * context) { /* Return the allocated recourses */ int i; @@ -227,8 +228,8 @@ static int ireduce_request_fini(mca_coll_adapt_reduce_context_t * context) */ static int send_cb(ompi_request_t * req) { - mca_coll_adapt_reduce_context_t *context = - (mca_coll_adapt_reduce_context_t *) req->req_complete_cb_data; + ompi_coll_adapt_reduce_context_t *context = + (ompi_coll_adapt_reduce_context_t *) req->req_complete_cb_data; OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: ireduce_send_cb, peer %d, seg_id %d\n", context->con->rank, context->peer, context->frag_id)); @@ -238,14 +239,14 @@ static int send_cb(ompi_request_t * req) /* Send a new segment */ OPAL_THREAD_LOCK(context->con->mutex_recv_list); - mca_coll_adapt_item_t *item = + ompi_coll_adapt_item_t *item = get_next_ready_item(context->con->recv_list, context->con->tree->tree_nextsize); OPAL_THREAD_UNLOCK(context->con->mutex_recv_list); if (item != NULL) { /* Get new context item from free list */ - mca_coll_adapt_reduce_context_t *send_context = - (mca_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component. + ompi_coll_adapt_reduce_context_t *send_context = + (ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component. adapt_ireduce_context_free_list); if (context->con->tree->tree_nextsize > 0) { send_context->buff = context->con->accumbuf[item->id]; @@ -316,8 +317,8 @@ static int send_cb(ompi_request_t * req) */ static int recv_cb(ompi_request_t * req) { - mca_coll_adapt_reduce_context_t *context = - (mca_coll_adapt_reduce_context_t *) req->req_complete_cb_data; + ompi_coll_adapt_reduce_context_t *context = + (ompi_coll_adapt_reduce_context_t *) req->req_complete_cb_data; OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: ireduce_recv_cb, peer %d, seg_id %d\n", context->con->rank, context->peer, context->frag_id)); @@ -329,7 +330,7 @@ static int recv_cb(ompi_request_t * req) /* Receive new segment */ if (new_id < context->con->num_segs) { char *temp_recv_buf = NULL; - mca_coll_adapt_inbuf_t *inbuf = NULL; + ompi_coll_adapt_inbuf_t *inbuf = NULL; /* Set inbuf, if it it first child, recv on rbuf, else recv on inbuf */ if (context->child_id == 0 && context->con->sbuf != MPI_IN_PLACE && context->con->root == context->con->rank) { @@ -339,12 +340,12 @@ static int recv_cb(ompi_request_t * req) } else { OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: In recv_cb, alloc inbuf\n", context->con->rank)); - inbuf = (mca_coll_adapt_inbuf_t *) opal_free_list_wait(context->con->inbuf_list); + inbuf = (ompi_coll_adapt_inbuf_t *) opal_free_list_wait(context->con->inbuf_list); temp_recv_buf = inbuf->buff - context->con->lower_bound; } /* Get new context item from free list */ - mca_coll_adapt_reduce_context_t *recv_context = - (mca_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component. + ompi_coll_adapt_reduce_context_t *recv_context = + (ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component. adapt_ireduce_context_free_list); recv_context->buff = temp_recv_buf; recv_context->frag_id = new_id; @@ -372,7 +373,7 @@ static int recv_cb(ompi_request_t * req) if (MPI_SUCCESS != err) { return err; } - /* Invoke recvive call back */ + /* Invoke receive call back */ ompi_request_set_callback(recv_req, recv_cb, recv_context); } @@ -443,14 +444,14 @@ static int recv_cb(ompi_request_t * req) if (context->con->rank != context->con->tree->tree_root && context->con->ongoing_send < mca_coll_adapt_component.adapt_ireduce_max_send_requests) { OPAL_THREAD_LOCK(context->con->mutex_recv_list); - mca_coll_adapt_item_t *item = + ompi_coll_adapt_item_t *item = get_next_ready_item(context->con->recv_list, context->con->tree->tree_nextsize); OPAL_THREAD_UNLOCK(context->con->mutex_recv_list); if (item != NULL) { - /* Gt new context item from free list */ - mca_coll_adapt_reduce_context_t *send_context = - (mca_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component. + /* Get new context item from free list */ + ompi_coll_adapt_reduce_context_t *send_context = + (ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component. adapt_ireduce_context_free_list); send_context->buff = context->con->accumbuf[context->frag_id]; send_context->frag_id = item->id; @@ -523,7 +524,7 @@ static int recv_cb(ompi_request_t * req) return 1; } -int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, +int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module) { @@ -543,9 +544,9 @@ int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_ int ireduce_tag = opal_atomic_add_fetch_32(&(comm->c_ireduce_tag), 1); ireduce_tag = (ireduce_tag % 4096) + 4096; fflush(stdout); - mca_coll_adapt_ireduce_fn_t reduce_func = - (mca_coll_adapt_ireduce_fn_t) - mca_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component. + ompi_coll_adapt_ireduce_fn_t reduce_func = + (ompi_coll_adapt_ireduce_fn_t) + ompi_coll_adapt_ireduce_algorithm_index[mca_coll_adapt_component. adapt_ireduce_algorithm].algorithm_fn_ptr; return reduce_func(sbuf, rbuf, count, dtype, op, root, comm, request, module, ireduce_tag); } @@ -554,20 +555,30 @@ int mca_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi_ /* * Ireduce functions with different algorithms */ -int mca_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_tuned(const void *sbuf, void *rbuf, int count, + struct ompi_datatype_t *dtype, struct ompi_op_t *op, + int root, struct ompi_communicator_t *comm, + ompi_request_t ** request, + mca_coll_base_module_t *module, int ireduce_tag) +{ + OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output, "tuned not implemented\n")); + return OMPI_SUCCESS; +} + +int ompi_coll_adapt_ireduce_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_bmtree(comm, root); int err = - mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, + ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ireduce_segment_size, ireduce_tag); return err; } -int mca_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, @@ -575,53 +586,53 @@ int mca_coll_adapt_ireduce_in_order_binomial(const void *sbuf, void *rbuf, int c { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_in_order_bmtree(comm, root); int err = - mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, + ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ireduce_segment_size, ireduce_tag); return err; } -int mca_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_binary(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_tree(2, comm, root); int err = - mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, + ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ireduce_segment_size, ireduce_tag); return err; } -int mca_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_pipeline(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(1, comm, root); int err = - mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, + ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ireduce_segment_size, ireduce_tag); return err; } -int mca_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_chain(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag) { ompi_coll_tree_t *tree = ompi_coll_base_topo_build_chain(4, comm, root); int err = - mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, + ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ireduce_segment_size, ireduce_tag); return err; } -int mca_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, int ireduce_tag) @@ -636,14 +647,14 @@ int mca_coll_adapt_ireduce_linear(const void *sbuf, void *rbuf, int count, tree = ompi_coll_base_topo_build_tree(MAXTREEFANOUT, comm, root); } int err = - mca_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, + ompi_coll_adapt_ireduce_generic(sbuf, rbuf, count, dtype, op, root, comm, request, module, tree, mca_coll_adapt_component.adapt_ireduce_segment_size, ireduce_tag); return err; } -int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, +int ompi_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request, mca_coll_base_module_t * module, ompi_coll_tree_t * tree, @@ -655,7 +666,7 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, size_t typelng; int seg_count = count, num_segs, rank, recv_count, send_count, i, j, err, min, distance = 0; int32_t seg_index; - _Atomic int *next_recv_segs = NULL; + opal_atomic_int_t *next_recv_segs = NULL; /* Used to store the accumuate result, pointer to every segment */ char **accumbuf = NULL; /* A free list contains all recv data */ @@ -686,9 +697,9 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, if (1 == context_free_list_enabled) { mca_coll_adapt_component.adapt_ireduce_context_free_list = OBJ_NEW(opal_free_list_t); opal_free_list_init(mca_coll_adapt_component.adapt_ireduce_context_free_list, - sizeof(mca_coll_adapt_reduce_context_t), + sizeof(ompi_coll_adapt_reduce_context_t), opal_cache_line_size, - OBJ_CLASS(mca_coll_adapt_reduce_context_t), + OBJ_CLASS(ompi_coll_adapt_reduce_context_t), 0, opal_cache_line_size, mca_coll_adapt_component.adapt_context_free_list_min, mca_coll_adapt_component.adapt_context_free_list_max, @@ -701,18 +712,18 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, if (tree->tree_nextsize > 0) { inbuf_list = OBJ_NEW(opal_free_list_t); opal_free_list_init(inbuf_list, - sizeof(mca_coll_adapt_inbuf_t) + real_seg_size, + sizeof(ompi_coll_adapt_inbuf_t) + real_seg_size, opal_cache_line_size, - OBJ_CLASS(mca_coll_adapt_inbuf_t), + OBJ_CLASS(ompi_coll_adapt_inbuf_t), 0, opal_cache_line_size, mca_coll_adapt_component.adapt_inbuf_free_list_min, mca_coll_adapt_component.adapt_inbuf_free_list_max, mca_coll_adapt_component.adapt_inbuf_free_list_inc, NULL, 0, NULL, NULL, NULL); /* Set up next_recv_segs */ - next_recv_segs = (_Atomic int32_t *) malloc(sizeof(int32_t) * tree->tree_nextsize); - mca_coll_adapt_inbuf_t *temp_inbuf = - (mca_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list); + next_recv_segs = (opal_atomic_int32_t *) malloc(sizeof(int32_t) * tree->tree_nextsize); + ompi_coll_adapt_inbuf_t *temp_inbuf = + (ompi_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list); distance = (char *) temp_inbuf->buff - lower_bound - (char *) temp_inbuf; //address of inbuf->buff to address of inbuf OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: distance %d, inbuf %p, inbuf->buff %p, inbuf->buff-lb %p, to_inbuf %p, inbuf_list %p\n", @@ -732,7 +743,7 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, OMPI_REQUEST_INIT(temp_request, false); temp_request->req_state = OMPI_REQUEST_ACTIVE; temp_request->req_type = 0; - temp_request->req_free = adapt_request_free; + temp_request->req_free = ompi_coll_adapt_request_free; temp_request->req_status.MPI_SOURCE = 0; temp_request->req_status.MPI_TAG = 0; temp_request->req_status.MPI_ERROR = 0; @@ -752,8 +763,8 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, recv_list = OBJ_NEW(opal_list_t); /* Set constant context for send and recv call back */ - mca_coll_adapt_constant_reduce_context_t *con = - OBJ_NEW(mca_coll_adapt_constant_reduce_context_t); + ompi_coll_adapt_constant_reduce_context_t *con = + OBJ_NEW(ompi_coll_adapt_constant_reduce_context_t); con->count = count; con->seg_count = seg_count; con->datatype = dtype; @@ -822,21 +833,21 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, recv_count = count - (ptrdiff_t) seg_count *(ptrdiff_t) seg_index; } char *temp_recv_buf = NULL; - mca_coll_adapt_inbuf_t *inbuf = NULL; + ompi_coll_adapt_inbuf_t *inbuf = NULL; /* Set inbuf, if it it first child, recv on rbuf, else recv on inbuf */ if (i == 0 && sbuf != MPI_IN_PLACE && root == rank) { temp_recv_buf = (char *) rbuf + (ptrdiff_t) j *(ptrdiff_t) segment_increment; } else { - inbuf = (mca_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list); + inbuf = (ompi_coll_adapt_inbuf_t *) opal_free_list_wait(inbuf_list); OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output, "[%d]: In ireduce, alloc inbuf %p\n", rank, (void *) inbuf)); temp_recv_buf = inbuf->buff - lower_bound; } /* Get context */ - mca_coll_adapt_reduce_context_t *context = - (mca_coll_adapt_reduce_context_t *) + ompi_coll_adapt_reduce_context_t *context = + (ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component. adapt_ireduce_context_free_list); context->buff = temp_recv_buf; @@ -871,10 +882,10 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, /* Leaf nodes */ else { - mca_coll_adapt_item_t *item; + ompi_coll_adapt_item_t *item; /* Set up recv_list */ for (seg_index = 0; seg_index < num_segs; seg_index++) { - item = OBJ_NEW(mca_coll_adapt_item_t); + item = OBJ_NEW(ompi_coll_adapt_item_t); item->id = seg_index; item->count = tree->tree_nextsize; opal_list_append(recv_list, (opal_list_item_t *) item); @@ -894,8 +905,8 @@ int mca_coll_adapt_ireduce_generic(const void *sbuf, void *rbuf, int count, if (item->id == (num_segs - 1)) { send_count = count - (ptrdiff_t) seg_count *(ptrdiff_t) item->id; } - mca_coll_adapt_reduce_context_t *context = - (mca_coll_adapt_reduce_context_t *) + ompi_coll_adapt_reduce_context_t *context = + (ompi_coll_adapt_reduce_context_t *) opal_free_list_wait(mca_coll_adapt_component.adapt_ireduce_context_free_list); context->buff = (char *) sbuf + (ptrdiff_t) item->id * (ptrdiff_t) segment_increment; diff --git a/ompi/mca/coll/adapt/coll_adapt_item.c b/ompi/mca/coll/adapt/coll_adapt_item.c index dabe2ce37b8..73258326a66 100644 --- a/ompi/mca/coll/adapt/coll_adapt_item.c +++ b/ompi/mca/coll/adapt/coll_adapt_item.c @@ -11,13 +11,13 @@ #include "coll_adapt_item.h" -static void mca_coll_adapt_item_constructor(mca_coll_adapt_item_t * item) +static void ompi_coll_adapt_item_constructor(ompi_coll_adapt_item_t * item) { } -static void mca_coll_adapt_item_destructor(mca_coll_adapt_item_t * item) +static void ompi_coll_adapt_item_destructor(ompi_coll_adapt_item_t * item) { } -OBJ_CLASS_INSTANCE(mca_coll_adapt_item_t, opal_list_item_t, mca_coll_adapt_item_constructor, - mca_coll_adapt_item_destructor); +OBJ_CLASS_INSTANCE(ompi_coll_adapt_item_t, opal_list_item_t, ompi_coll_adapt_item_constructor, + ompi_coll_adapt_item_destructor); diff --git a/ompi/mca/coll/adapt/coll_adapt_item.h b/ompi/mca/coll/adapt/coll_adapt_item.h index 2fc6cbdbd03..768f9f29dc0 100644 --- a/ompi/mca/coll/adapt/coll_adapt_item.h +++ b/ompi/mca/coll/adapt/coll_adapt_item.h @@ -12,7 +12,7 @@ #include "opal/class/opal_list.h" #include "coll_adapt_inbuf.h" -struct mca_coll_adapt_item_s { +struct ompi_coll_adapt_item_s { opal_list_item_t super; /* Fragment id */ int id; @@ -20,6 +20,6 @@ struct mca_coll_adapt_item_s { int count; }; -typedef struct mca_coll_adapt_item_s mca_coll_adapt_item_t; +typedef struct ompi_coll_adapt_item_s ompi_coll_adapt_item_t; -OBJ_CLASS_DECLARATION(mca_coll_adapt_item_t); +OBJ_CLASS_DECLARATION(ompi_coll_adapt_item_t); diff --git a/ompi/mca/coll/adapt/coll_adapt_module.c b/ompi/mca/coll/adapt/coll_adapt_module.c index e709313361f..20f27d2ab24 100644 --- a/ompi/mca/coll/adapt/coll_adapt_module.c +++ b/ompi/mca/coll/adapt/coll_adapt_module.c @@ -14,17 +14,17 @@ #include #ifdef HAVE_STRING_H #include -#endif +#endif /* HAVE_STRING_H */ #ifdef HAVE_SCHED_H #include -#endif +#endif /* HAVE_SCHED_H */ #include #ifdef HAVE_SYS_MMAN_H #include -#endif /* HAVE_SYS_MMAN_H */ +#endif /* HAVE_SYS_MMAN_H */ #ifdef HAVE_UNISTD_H #include -#endif /* HAVE_UNISTD_H */ +#endif /* HAVE_UNISTD_H */ #include "mpi.h" #include "opal_stdint.h" @@ -35,7 +35,6 @@ #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/base.h" #include "ompi/mca/coll/base/coll_base_functions.h" -//#include "ompi/mca/rte/rte.h" #include "ompi/proc/proc.h" #include "coll_adapt.h" @@ -47,29 +46,37 @@ /* * Local functions */ -static int adapt_module_enable(mca_coll_base_module_t * module, struct ompi_communicator_t *comm); /* * Module constructor */ -static void mca_coll_adapt_module_construct(mca_coll_adapt_module_t * module) +static void adapt_module_construct(mca_coll_adapt_module_t * module) { - module->enabled = false; - module->adapt_component = &mca_coll_adapt_component; + module->adapt_enabled = false; } /* * Module destructor */ -static void mca_coll_adapt_module_destruct(mca_coll_adapt_module_t * module) +static void adapt_module_destruct(mca_coll_adapt_module_t * module) { - module->enabled = false; + module->adapt_enabled = false; } OBJ_CLASS_INSTANCE(mca_coll_adapt_module_t, - mca_coll_base_module_t, - mca_coll_adapt_module_construct, mca_coll_adapt_module_destruct); + mca_coll_base_module_t, + adapt_module_construct, + adapt_module_destruct); + +/* + * Init module on the communicator + */ +static int adapt_module_enable(mca_coll_base_module_t * module, + struct ompi_communicator_t *comm) +{ + return OMPI_SUCCESS; +} /* * Initial query function that is invoked during MPI_INIT, allowing @@ -77,34 +84,37 @@ OBJ_CLASS_INSTANCE(mca_coll_adapt_module_t, * required level of thread support. This function is invoked exactly * once. */ -int mca_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads) +int ompi_coll_adapt_init_query(bool enable_progress_threads, bool enable_mpi_threads) { return OMPI_SUCCESS; } - /* * Invoked when there's a new communicator that has been created. * Look at the communicator and decide which set of functions and * priority we want to return. */ -mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t * comm, int *priority) +mca_coll_base_module_t *ompi_coll_adapt_comm_query(struct ompi_communicator_t * comm, + int *priority) { mca_coll_adapt_module_t *adapt_module; /* If we're intercomm, or if there's only one process in the communicator */ if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:adapt:comm_query (%d/%s): intercomm, comm is too small; disqualifying myself", + "coll:adapt:comm_query (%d/%s): intercomm, " + "comm is too small; disqualifying myself", comm->c_contextid, comm->c_name); return NULL; } - /* Get the priority level attached to this module. If priority is less than or equal to 0, then the module is unavailable. */ + /* Get the priority level attached to this module. + If priority is less than or equal to 0, then the module is unavailable. */ *priority = mca_coll_adapt_component.adapt_priority; if (mca_coll_adapt_component.adapt_priority <= 0) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:adapt:comm_query (%d/%s): priority too low; disqualifying myself", + "coll:adapt:comm_query (%d/%s): priority too low; " + "disqualifying myself", comm->c_contextid, comm->c_name); return NULL; } @@ -123,17 +133,17 @@ mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t * c adapt_module->super.coll_alltoall = NULL; adapt_module->super.coll_alltoallw = NULL; adapt_module->super.coll_barrier = NULL; - adapt_module->super.coll_bcast = mca_coll_adapt_bcast; + adapt_module->super.coll_bcast = ompi_coll_adapt_bcast; adapt_module->super.coll_exscan = NULL; adapt_module->super.coll_gather = NULL; adapt_module->super.coll_gatherv = NULL; - adapt_module->super.coll_reduce = mca_coll_adapt_reduce; + adapt_module->super.coll_reduce = ompi_coll_adapt_reduce; adapt_module->super.coll_reduce_scatter = NULL; adapt_module->super.coll_scan = NULL; adapt_module->super.coll_scatter = NULL; adapt_module->super.coll_scatterv = NULL; - adapt_module->super.coll_ibcast = mca_coll_adapt_ibcast; - adapt_module->super.coll_ireduce = mca_coll_adapt_ireduce; + adapt_module->super.coll_ibcast = ompi_coll_adapt_ibcast; + adapt_module->super.coll_ireduce = ompi_coll_adapt_ireduce; adapt_module->super.coll_iallreduce = NULL; opal_output_verbose(10, ompi_coll_base_framework.framework_output, @@ -143,17 +153,9 @@ mca_coll_base_module_t *mca_coll_adapt_comm_query(struct ompi_communicator_t * c } /* - * Init module on the communicator - */ -static int adapt_module_enable(mca_coll_base_module_t * module, struct ompi_communicator_t *comm) -{ - return OMPI_SUCCESS; -} - -/* - * Free ADAPT request + * Free ADAPT request */ -int adapt_request_free(ompi_request_t ** request) +int ompi_coll_adapt_request_free(ompi_request_t ** request) { (*request)->req_state = OMPI_REQUEST_INVALID; OBJ_RELEASE(*request); diff --git a/ompi/mca/coll/adapt/coll_adapt_reduce.c b/ompi/mca/coll/adapt/coll_adapt_reduce.c index f41afe21484..e45bb3478a9 100644 --- a/ompi/mca/coll/adapt/coll_adapt_reduce.c +++ b/ompi/mca/coll/adapt/coll_adapt_reduce.c @@ -13,7 +13,7 @@ #include "coll_adapt_algorithms.h" /* MPI_Reduce and MPI_Ireduce in the ADAPT module only work for commutative operations */ -int mca_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, +int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t * module) { @@ -22,7 +22,7 @@ int mca_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_d } else { ompi_request_t *request; int err = - mca_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module); + ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module); ompi_request_wait(&request, MPI_STATUS_IGNORE); return err; }