Skip to content

Commit

Permalink
Merge pull request pmodels#6571 from abrooks98/engine_selection
Browse files Browse the repository at this point in the history
mpl/gpu/ze: add engine selection logic to optimize difference copy types (PR #8)

Approved-by: Hui Zhou
Approved-by: Ken Raffenetti
  • Loading branch information
hzhou authored Jul 20, 2023
2 parents 98b84c4 + 9a2664b commit 5d62e24
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 58 deletions.
5 changes: 1 addition & 4 deletions src/include/mpir_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,7 @@ MPL_STATIC_INLINE_PREFIX int MPIR_GPU_query_pointer_attr(const void *ptr, MPL_po
MPL_STATIC_INLINE_PREFIX bool MPIR_GPU_query_pointer_is_dev(const void *ptr)
{
if (ENABLE_GPU && ptr != NULL) {
MPL_pointer_attr_t attr;
MPL_gpu_query_pointer_attr(ptr, &attr);

return attr.type == MPL_GPU_POINTER_DEV;
return MPL_gpu_query_pointer_is_dev(ptr, NULL);
}

return false;
Expand Down
23 changes: 18 additions & 5 deletions src/mpid/ch4/shm/ipc/src/ipc_p2p.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,18 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_IPCI_copy_data(MPIDI_IPC_hdr * ipc_hdr, MPIR_
goto fn_exit;
}

MPL_STATIC_INLINE_PREFIX MPL_gpu_engine_type_t MPIDI_IPCI_choose_engine(int dev1, int dev2)
{
MPL_gpu_engine_type_t engine = MPL_GPU_ENGINE_TYPE_COPY_LOW_LATENCY;
if (dev1 == -1 || dev2 == -1) {
return MPL_GPU_ENGINE_TYPE_COPY_HIGH_BANDWIDTH;
}
assert(dev1 != -1 && dev2 != -1);
if (MPL_gpu_query_is_same_dev(dev1, dev2))
engine = MPL_GPU_ENGINE_TYPE_COPY_HIGH_BANDWIDTH;
return engine;
}

MPL_STATIC_INLINE_PREFIX int MPIDI_IPCI_handle_lmt_recv(MPIDI_IPC_hdr * ipc_hdr,
size_t src_data_sz,
MPIR_Request * sreq_ptr,
Expand Down Expand Up @@ -200,11 +212,12 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_IPCI_handle_lmt_recv(MPIDI_IPC_hdr * ipc_hdr,
MPIR_ERR_CHECK(mpi_errno);
/* copy */
if (ipc_hdr->is_contig && dt_contig) {
mpi_errno = MPIR_Localcopy_gpu(src_buf, src_data_sz, MPI_BYTE, NULL,
MPIDIG_REQUEST(rreq, buffer),
MPIDIG_REQUEST(rreq, count),
MPIDIG_REQUEST(rreq, datatype), &attr,
MPL_GPU_ENGINE_TYPE_COPY_HIGH_BANDWIDTH, true);
MPL_gpu_engine_type_t engine =
MPIDI_IPCI_choose_engine(ipc_hdr->ipc_handle.gpu.global_dev_id, dev_id);
mpi_errno =
MPIR_Localcopy_gpu(src_buf, src_data_sz, MPI_BYTE, NULL,
MPIDIG_REQUEST(rreq, buffer), MPIDIG_REQUEST(rreq, count),
MPIDIG_REQUEST(rreq, datatype), &attr, engine, true);
MPIR_ERR_CHECK(mpi_errno);
} else {
/* TODO: get sender datatype and call MPIR_Typerep_op with mapped_device set to dev_id */
Expand Down
5 changes: 4 additions & 1 deletion src/mpid/ch4/shm/ipc/src/ipc_send.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_IPCI_try_lmt_isend(const void *buf, MPI_Aint
if (ipc_attr.gpu_attr.type == MPL_GPU_POINTER_DEV) {
mpi_errno = MPIDI_GPU_get_ipc_attr(mem_addr, rank, comm, &ipc_attr);
MPIR_ERR_CHECK(mpi_errno);
} else {
} else if (!MPL_gpu_query_pointer_is_dev(buf, &ipc_attr.gpu_attr)) {
/* The result of MPL_gpu_query_pointer_is_dev is not necessarily equivalent to
* (gpu_attr.type == MPL_GPU_POINTER_DEV) depending on the backend. This explicit check
* ensures the pointer can be accepted by XPMEM and work as intended. */
mpi_errno = MPIDI_XPMEM_get_ipc_attr(mem_addr, mem_size, &ipc_attr);
MPIR_ERR_CHECK(mpi_errno);
}
Expand Down
11 changes: 11 additions & 0 deletions src/mpl/include/mpl_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,21 @@ static inline int MPL_gpu_query_pointer_attr(const void *ptr, MPL_pointer_attr_t
return MPL_SUCCESS;
}

static inline int MPL_gpu_query_pointer_is_dev(const void *ptr, MPL_pointer_attr_t * attr)
{
return 0;
}

static inline int MPL_gpu_query_is_same_dev(int dev1, int dev2)
{
return dev1 == dev2;
}
#endif /* ! MPL_HAVE_GPU */

int MPL_gpu_query_support(MPL_gpu_type_t * type);
int MPL_gpu_query_pointer_attr(const void *ptr, MPL_pointer_attr_t * attr);
int MPL_gpu_query_pointer_is_dev(const void *ptr, MPL_pointer_attr_t * attr);
int MPL_gpu_query_is_same_dev(int dev1, int dev2);

int MPL_gpu_ipc_handle_create(const void *ptr, MPL_gpu_device_attr * ptr_attr,
MPL_gpu_ipc_mem_handle_t * mpl_ipc_handle);
Expand Down
16 changes: 16 additions & 0 deletions src/mpl/src/gpu/mpl_gpu_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,22 @@ int MPL_gpu_query_pointer_attr(const void *ptr, MPL_pointer_attr_t * attr)
goto fn_exit;
}

int MPL_gpu_query_pointer_is_dev(const void *ptr, MPL_pointer_attr_t * attr)
{
MPL_pointer_attr_t a;

if (attr == NULL) {
MPL_gpu_query_pointer_attr(ptr, &a);
attr = &a;
}
return attr->type == MPL_GPU_POINTER_DEV;
}

int MPL_gpu_query_is_same_dev(int dev1, int dev2)
{
return dev1 == dev2;
}

int MPL_gpu_ipc_handle_create(const void *ptr, MPL_gpu_device_attr * ptr_attr,
MPL_gpu_ipc_mem_handle_t * ipc_handle)
{
Expand Down
16 changes: 16 additions & 0 deletions src/mpl/src/gpu/mpl_gpu_hip.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ int MPL_gpu_query_pointer_attr(const void *ptr, MPL_pointer_attr_t * attr)
goto fn_exit;
}

int MPL_gpu_query_pointer_is_dev(const void *ptr, MPL_pointer_attr_t * attr)
{
MPL_pointer_attr_t a;

if (attr == NULL) {
MPL_gpu_query_pointer_attr(ptr, &a);
attr = &a;
}
return attr->type == MPL_GPU_POINTER_DEV;
}

int MPL_gpu_query_is_same_dev(int dev1, int dev2)
{
return dev1 == dev2;
}

int MPL_gpu_ipc_handle_create(const void *ptr, MPL_gpu_device_attr * ptr_attr,
MPL_gpu_ipc_mem_handle_t * ipc_handle)
{
Expand Down
Loading

0 comments on commit 5d62e24

Please sign in to comment.