Skip to content

Commit

Permalink
prov/peer: Introducing peer "provider" with peer_cq to begin with
Browse files Browse the repository at this point in the history
include/ofi_peer.h file along with prov/peer directory defines space
where peer-related common code is placed.
peer_cq is the first structure that has been defined. It is based on
coll_cq structure. coll provider uses peer_cq instead of removed coll_cq.

See man/fi_peer.3.md

Signed-off-by: Tomasz Gromadzki <[email protected]>
  • Loading branch information
grom72 committed Dec 2, 2022
1 parent 2f2fe6b commit 0295e33
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 32 deletions.
4 changes: 3 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,12 @@ common_srcs = \
prov/util/src/rocr_mem_monitor.c \
prov/util/src/ze_mem_monitor.c \
prov/util/src/cuda_ipc_monitor.c \
prov/peer/src/peer_cq.c \
prov/peer/src/peer.h \
prov/coll/src/coll_attr.c \
prov/coll/src/coll_av.c \
prov/coll/src/coll_av_set.c \
prov/coll/src/coll_coll.c \
prov/coll/src/coll_cq.c \
prov/coll/src/coll_domain.c \
prov/coll/src/coll_ep.c \
prov/coll/src/coll_eq.c \
Expand Down Expand Up @@ -189,6 +190,7 @@ src_libfabric_la_SOURCES = \
include/ofi_net.h \
include/ofi_perf.h \
include/ofi_coll.h \
include/ofi_peer.h \
include/fasthash.h \
include/rbtree.h \
include/uthash.h \
Expand Down
47 changes: 47 additions & 0 deletions include/ofi_peer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Copyright (c) 2022 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#ifndef _OFI_PEER_H_
#define _OFI_PEER_H_

#include <rdma/fi_collective.h>

#include <ofi_util.h>

int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_fid, void *context);

ssize_t ofi_peer_cq_write(struct fid_cq *cq, void *context, uint64_t flags,
size_t len, void *buf, uint64_t data, uint64_t tag,
fi_addr_t src);

#endif /* _OFI_PEER_H_ */
7 changes: 2 additions & 5 deletions prov/coll/src/coll.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
#include <ofi_hmem.h>
#include <ofi_prov.h>
#include <ofi_atomic.h>
#include <ofi_peer.h>
#include <ofi_coll.h>

#define COLL_IOV_LIMIT 4
#define COLL_MR_MODES (OFI_MR_BASIC_MAP | FI_MR_LOCAL)
Expand All @@ -81,11 +83,6 @@ struct coll_av {
struct fid_peer_av *peer_av;
};

struct coll_cq {
struct util_cq util_cq;
struct fid_peer_cq *peer_cq;
};

struct coll_eq {
struct util_eq util_eq;
struct fid_eq *peer_eq;
Expand Down
10 changes: 4 additions & 6 deletions prov/coll/src/coll_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
*/

#include "coll.h"
#include "ofi_coll.h"

static uint64_t coll_form_tag(uint32_t coll_id, uint32_t rank)
{
Expand Down Expand Up @@ -721,13 +720,12 @@ void coll_join_comp(struct util_coll_operation *coll_op)
void coll_collective_comp(struct util_coll_operation *coll_op)
{
struct coll_ep *ep;
struct coll_cq *cq;

int ret;
ep = container_of(coll_op->ep, struct coll_ep, util_ep.ep_fid);
cq = container_of(ep->util_ep.tx_cq, struct coll_cq, util_cq);

if (cq->peer_cq->owner_ops->write(cq->peer_cq, coll_op->context,
FI_COLLECTIVE, 0, 0, 0, 0, 0))
ret = ofi_peer_cq_write(&ep->util_ep.tx_cq->cq_fid, coll_op->context,
FI_COLLECTIVE, 0, 0, 0, 0, 0);
if (ret)
FI_WARN(ep->util_ep.domain->fabric->prov, FI_LOG_DOMAIN,
"collective - cq write failed\n");

Expand Down
2 changes: 1 addition & 1 deletion prov/coll/src/coll_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
static struct fi_ops_domain coll_domain_ops = {
.size = sizeof(struct fi_ops_domain),
.av_open = coll_av_open,
.cq_open = coll_cq_open,
.cq_open = ofi_peer_cq_open,
.endpoint = coll_endpoint,
.scalable_ep = fi_no_scalable_ep,
.cntr_open = fi_no_cntr_open,
Expand Down
51 changes: 51 additions & 0 deletions prov/peer/src/peer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

/*
* Copyright (c) 2022 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#ifndef _PEER_H_
#define _PEER_H_

#if HAVE_CONFIG_H
#include <config.h>
#endif

#include <ofi.h>
#include <ofi_util.h>
#include <ofi_peer.h>

struct peer_cq {
struct util_cq util_cq;
struct fid_peer_cq *peer_cq;
};

#endif /* _PEER_H_ */

63 changes: 44 additions & 19 deletions prov/coll/src/coll_cq.c → prov/peer/src/peer_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@
* SOFTWARE.
*/

#include "coll.h"
#include "peer.h"

static int coll_cq_close(struct fid *fid)
static int peer_cq_close(struct fid *fid)
{
struct coll_cq *cq;
struct peer_cq *cq;
int ret;

cq = container_of(fid, struct coll_cq, util_cq.cq_fid.fid);
cq = container_of(fid, struct peer_cq, util_cq.cq_fid.fid);

ret = ofi_cq_cleanup(&cq->util_cq);
if (ret)
Expand All @@ -47,15 +47,15 @@ static int coll_cq_close(struct fid *fid)
return 0;
}

static struct fi_ops coll_cq_fi_ops = {
static struct fi_ops peer_cq_fi_ops = {
.size = sizeof(struct fi_ops),
.close = coll_cq_close,
.close = peer_cq_close,
.bind = fi_no_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};

static struct fi_ops_cq coll_cq_ops = {
static struct fi_ops_cq peer_cq_ops = {
.size = sizeof(struct fi_ops_cq),
.read = fi_no_cq_read,
.readfrom = fi_no_cq_readfrom,
Expand All @@ -66,21 +66,27 @@ static struct fi_ops_cq coll_cq_ops = {
.strerror = fi_no_cq_strerror,
};

int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_fid, void *context)
int peer_cq_init(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_fid, struct fi_peer_cq_context *peer_context)
{
struct coll_cq *cq;
struct fi_peer_cq_context *peer_context = context;
struct peer_cq *cq;
int ret;

const struct util_domain *util_domain;
const struct fi_provider* provider;

util_domain = container_of(domain, struct util_domain, domain_fid.fid);
provider = util_domain->fabric->prov;


if (!attr || !(attr->flags & FI_PEER)) {
FI_WARN(&coll_prov, FI_LOG_CORE, "FI_PEER flag required\n");
return -EINVAL;
FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n");
return -FI_EINVAL;
}

if (!peer_context || peer_context->size < sizeof(*peer_context)) {
FI_WARN(&coll_prov, FI_LOG_CORE, "invalid peer CQ context\n");
return -EINVAL;
FI_WARN(provider, FI_LOG_CORE, "invalid peer CQ context\n");
return -FI_EINVAL;
}

cq = calloc(1, sizeof(*cq));
Expand All @@ -89,17 +95,36 @@ int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,

cq->peer_cq = peer_context->cq;

ret = ofi_cq_init(&coll_prov, domain, attr, &cq->util_cq, &ofi_cq_progress,
context);
ret = ofi_cq_init(provider, domain, attr, &cq->util_cq,
&ofi_cq_progress, NULL);
if (ret)
goto err;

*cq_fid = &cq->util_cq.cq_fid;
(*cq_fid)->fid.ops = &coll_cq_fi_ops;
(*cq_fid)->ops = &coll_cq_ops;
(*cq_fid)->fid.ops = &peer_cq_fi_ops;
(*cq_fid)->ops = &peer_cq_ops;
return 0;

err:
free(cq);
return ret;
}

int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_fid, void *context)
{
struct fi_peer_cq_context *peer_context = context;
return peer_cq_init(domain, attr, cq_fid, peer_context);
}

ssize_t ofi_peer_cq_write(struct fid_cq *cq_fid, void *context, uint64_t flags,
size_t len, void *buf, uint64_t data, uint64_t tag,
fi_addr_t src)
{
struct peer_cq *cq;

cq = container_of(cq_fid, struct peer_cq, util_cq.cq_fid);

return cq->peer_cq->owner_ops->write(cq->peer_cq, context,
flags, len, buf, data, tag, src);
}

0 comments on commit 0295e33

Please sign in to comment.