From 3d1ef9ce6b4ecc0f4170a20650df4837806a8942 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 19 Jun 2024 19:24:05 -0400 Subject: [PATCH] op/cuda: Lazily initialize the CUDA information Signed-off-by: Joseph Schuchart --- ompi/mca/op/cuda/op_cuda.h | 2 + ompi/mca/op/cuda/op_cuda_component.c | 58 +++++++++++++++------------- ompi/mca/op/cuda/op_cuda_functions.c | 2 + 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/ompi/mca/op/cuda/op_cuda.h b/ompi/mca/op/cuda/op_cuda.h index 11417b28550..a88fb49c0ef 100644 --- a/ompi/mca/op/cuda/op_cuda.h +++ b/ompi/mca/op/cuda/op_cuda.h @@ -75,6 +75,8 @@ ompi_op_base_stream_handler_fn_t ompi_op_cuda_functions[OMPI_OP_BASE_FORTRAN_OP_ extern ompi_op_base_3buff_stream_handler_fn_t ompi_op_cuda_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX]; +void ompi_op_cuda_lazy_init(); + END_C_DECLS #endif /* MCA_OP_CUDA_EXPORT_H */ diff --git a/ompi/mca/op/cuda/op_cuda_component.c b/ompi/mca/op/cuda/op_cuda_component.c index 3ead710bd1d..a3b87c4941c 100644 --- a/ompi/mca/op/cuda/op_cuda_component.c +++ b/ompi/mca/op/cuda/op_cuda_component.c @@ -127,6 +127,36 @@ cuda_component_register(void) static int cuda_component_init_query(bool enable_progress_threads, bool enable_mpi_thread_multiple) +{ + return OMPI_SUCCESS; +} + +/* + * Query whether this component can be used for a specific op + */ +static struct ompi_op_base_module_1_0_0_t* +cuda_component_op_query(struct ompi_op_t *op, int *priority) +{ + ompi_op_base_module_t *module = NULL; + + module = OBJ_NEW(ompi_op_base_module_t); + module->opm_device_enabled = true; + for (int i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) { + module->opm_stream_fns[i] = ompi_op_cuda_functions[op->o_f_to_c_index][i]; + module->opm_3buff_stream_fns[i] = ompi_op_cuda_3buff_functions[op->o_f_to_c_index][i]; + + if( NULL != module->opm_fns[i] ) { + OBJ_RETAIN(module); + } + if( NULL != module->opm_3buff_fns[i] ) { + OBJ_RETAIN(module); + } + } + *priority = 50; + return (ompi_op_base_module_1_0_0_t *) module; +} + +void ompi_op_cuda_lazy_init() { int num_devices; int rc; @@ -166,30 +196,4 @@ cuda_component_init_query(bool enable_progress_threads, } } - return OMPI_SUCCESS; -} - -/* - * Query whether this component can be used for a specific op - */ -static struct ompi_op_base_module_1_0_0_t* -cuda_component_op_query(struct ompi_op_t *op, int *priority) -{ - ompi_op_base_module_t *module = NULL; - - module = OBJ_NEW(ompi_op_base_module_t); - module->opm_device_enabled = true; - for (int i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) { - module->opm_stream_fns[i] = ompi_op_cuda_functions[op->o_f_to_c_index][i]; - module->opm_3buff_stream_fns[i] = ompi_op_cuda_3buff_functions[op->o_f_to_c_index][i]; - - if( NULL != module->opm_fns[i] ) { - OBJ_RETAIN(module); - } - if( NULL != module->opm_3buff_fns[i] ) { - OBJ_RETAIN(module); - } - } - *priority = 50; - return (ompi_op_base_module_1_0_0_t *) module; -} +} \ No newline at end of file diff --git a/ompi/mca/op/cuda/op_cuda_functions.c b/ompi/mca/op/cuda/op_cuda_functions.c index 904595147cb..27361cee6a3 100644 --- a/ompi/mca/op/cuda/op_cuda_functions.c +++ b/ompi/mca/op/cuda/op_cuda_functions.c @@ -55,6 +55,8 @@ static inline void device_op_pre(const void *orig_source1, uint64_t target_flags = -1, source1_flags = -1, source2_flags = -1; int target_rc, source1_rc, source2_rc = -1; + ompi_op_cuda_lazy_init(); + *target = orig_target; *source1 = (void*)orig_source1; if (NULL != orig_source2) {