Skip to content

Commit

Permalink
Merge pull request torvalds#184 from hkchu/offload
Browse files Browse the repository at this point in the history
lkl: Add offload (TSO4, CSUM) support to LKL device, #2 of 2
  • Loading branch information
Octavian Purdila authored Aug 3, 2016
2 parents 59c55ff + dd9bbf6 commit bafaea1
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 48 deletions.
96 changes: 79 additions & 17 deletions tools/lkl/lib/virtio.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include <lkl_host.h>
#include <lkl/linux/virtio_ring.h>
#include "iomem.h"
Expand Down Expand Up @@ -70,10 +71,32 @@ void virtio_req_complete(struct virtio_req *req, uint32_t len)
struct virtio_queue *q = req->q;
struct virtio_dev *dev = req->dev;
uint16_t idx = le16toh(q->used->idx) & (q->num - 1);
uint16_t new = le16toh(q->used->idx) + 1;
uint16_t new;
int send_irq = 0;
int avail_used;

q->used->ring[idx].id = htole16(req->idx);
if (req->mergeable_rx_len == 0) {
new = le16toh(q->used->idx) + 1;
avail_used = 1;
} else {
/* we've potentially used up multiple (non-chained)
* descriptors and have to create one "used" entry for
* each descr we've consumed.
*/
int i = 0, last_idx = q->last_avail_idx, req_idx;

avail_used = req->buf_count;
new = le16toh(q->used->idx) + req->buf_count;
while (i < req->buf_count-1) {
q->used->ring[idx].len = htole16(req->buf[i].len);
len -= req->buf[i].len;
idx++; i++; last_idx++;
idx &= (q->num - 1);
req_idx = q->avail->ring[last_idx & (q->num - 1)];
q->used->ring[idx].id = htole16(req_idx);
}
}
q->used->ring[idx].len = htole16(len);
/* Make sure all memory writes before are visible to the driver before
* updating the idx.
Expand All @@ -86,9 +109,9 @@ void virtio_req_complete(struct virtio_req *req, uint32_t len)

/* Triggers the irq whenever there is no available buffer.
* q->last_avail_idx is incremented after calling virtio_req_complete(),
* so here we need to add one to it.
* so here we need to add avail_used to it.
*/
if (q->last_avail_idx + 1 == q->avail->idx)
if (q->last_avail_idx + avail_used == q->avail->idx)
send_irq = 1;

/* There are two rings: q->avail and q->used for each of the rx and tx
Expand Down Expand Up @@ -148,35 +171,68 @@ static void init_dev_buf_from_vring_desc(struct lkl_dev_buf *buf,
bad_driver("bad vring_desc\n");
}

/*
* Below there are two distinctly different (per packet) buffer allocation
* schemes for us to deal with:
*
* 1. One or more descriptors chained through "next" as indicated by the
* LKL_VRING_DESC_F_NEXT flag,
* 2. One or more descriptors from the ring sequentially, as many as are
* available and needed. This is the RX only "mergeable_rx_bufs" mode.
* The mode is entered when the VIRTIO_NET_F_MRG_RXBUF device feature
* is enabled.
*/
static int virtio_process_one(struct virtio_dev *dev, struct virtio_queue *q,
int idx)
int idx, bool is_mergeable_rx)
{
int q_buf_cnt = 0, ret = -1;
struct virtio_req req = {
.dev = dev,
.q = q,
.idx = q->avail->ring[idx & (q->num - 1)],
.mergeable_rx_len = 0,
};
uint16_t prev_flags = LKL_VRING_DESC_F_NEXT;
struct lkl_vring_desc *curr_vring_desc = vring_desc_at_le_idx(q, req.idx);

while ((prev_flags & LKL_VRING_DESC_F_NEXT) &&
(q_buf_cnt < VIRTIO_REQ_MAX_BUFS)) {
prev_flags = le16toh(curr_vring_desc->flags);
init_dev_buf_from_vring_desc(&req.buf[q_buf_cnt++], curr_vring_desc);
curr_vring_desc = vring_desc_at_le_idx(q, curr_vring_desc->next);
}

/* Somehow, we've built a request that's too long to fit onto our device */
if (q_buf_cnt == VIRTIO_REQ_MAX_BUFS &&
(prev_flags & LKL_VRING_DESC_F_NEXT))
bad_driver("enqueued too many request bufs");
if (is_mergeable_rx) {
int len = 0, desc_idx;

/* We may receive upto 64KB TSO packet so collect as many
* descriptors as there are available upto 64KB in total len.
*/
while ((len < 65535) && (q_buf_cnt < VIRTIO_REQ_MAX_BUFS)) {
init_dev_buf_from_vring_desc(
&req.buf[q_buf_cnt], curr_vring_desc);
len += req.buf[q_buf_cnt++].len;
if (++idx == le16toh(q->avail->idx))
break;
desc_idx = q->avail->ring[idx & (q->num - 1)];
curr_vring_desc = vring_desc_at_le_idx(q, desc_idx);
}
req.mergeable_rx_len = len;
} else {
while ((prev_flags & LKL_VRING_DESC_F_NEXT) &&
(q_buf_cnt < VIRTIO_REQ_MAX_BUFS)) {
prev_flags = le16toh(curr_vring_desc->flags);
init_dev_buf_from_vring_desc(
&req.buf[q_buf_cnt++], curr_vring_desc);
curr_vring_desc =
vring_desc_at_le_idx(q, curr_vring_desc->next);
}
/* Somehow we've built a request too long to fit our device */
if (q_buf_cnt == VIRTIO_REQ_MAX_BUFS &&
(prev_flags & LKL_VRING_DESC_F_NEXT))
bad_driver("enqueued too many request bufs");
}
req.buf_count = q_buf_cnt;
ret = dev->ops->enqueue(dev, &req);
if (ret < 0)
return ret;
q->last_avail_idx++;
if (is_mergeable_rx)
q->last_avail_idx += ret;
else
q->last_avail_idx++;
return 0;
}

Expand All @@ -200,18 +256,24 @@ static int virtio_process_one(struct virtio_dev *dev, struct virtio_queue *q,
void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx)
{
struct virtio_queue *q = &dev->queue[qidx];
bool is_mergeable_rx;

if (!q->ready)
return;

if (dev->ops->acquire_queue)
dev->ops->acquire_queue(dev, qidx);

is_mergeable_rx = ((dev->device_id == LKL_VIRTIO_ID_NET) &&
is_rx_queue(dev, q) &&
(dev->device_features & BIT(LKL_VIRTIO_NET_F_MRG_RXBUF)));

while (q->last_avail_idx != le16toh(q->avail->idx)) {
/* Make sure following loads happens after loading q->avail->idx.
*/
__sync_synchronize();
if (virtio_process_one(dev, q, q->last_avail_idx) < 0)
if (virtio_process_one(dev, q, q->last_avail_idx,
is_mergeable_rx) < 0)
break;
if (q->last_avail_idx == le16toh(q->avail->idx))
virtio_set_avail_event(q, q->avail->idx);
Expand Down
32 changes: 31 additions & 1 deletion tools/lkl/lib/virtio.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,28 @@
#include <stdint.h>
#include <lkl_host.h>

#define VIRTIO_REQ_MAX_BUFS 4
#define PAGE_SIZE 4096

/* The following are copied from skbuff.h */
#if (65536/PAGE_SIZE + 1) < 16
#define MAX_SKB_FRAGS 16UL
#else
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
#endif

#define VIRTIO_REQ_MAX_BUFS (MAX_SKB_FRAGS + 2)

/* We always have 2 queues on a netdev: one for tx, one for rx. */
#define RX_QUEUE_IDX 0
#define TX_QUEUE_IDX 1

struct virtio_req {
struct virtio_dev *dev;
struct virtio_queue *q;
uint16_t idx;
uint16_t buf_count;
struct lkl_dev_buf buf[VIRTIO_REQ_MAX_BUFS];
uint32_t mergeable_rx_len;
};

struct virtio_dev_ops {
Expand All @@ -21,6 +35,9 @@ struct virtio_dev_ops {
* the current request is not consumed from the queue and the host
* device is resposible for restaring the queue processing by calling
* virtio_process_queue at a later time.
* A special case exists if a netdev is in mergeable RX buffer mode
* where more than one "avail" slots may be consumed. In this case
* it will return how many avail idx to advance.
*/
int (*enqueue)(struct virtio_dev *dev, struct virtio_req *req);
/* Acquire/release a lock on the specified queue. Only
Expand Down Expand Up @@ -71,4 +88,17 @@ void virtio_process_queue(struct virtio_dev *dev, uint32_t qidx);
#define container_of(ptr, type, member) \
(type *)((char *)(ptr) - __builtin_offsetof(type, member))


static inline int is_rx_queue(struct virtio_dev *dev,
struct virtio_queue *queue)
{
return &dev->queue[RX_QUEUE_IDX] == queue;
}

static inline int is_tx_queue(struct virtio_dev *dev,
struct virtio_queue *queue)
{
return &dev->queue[TX_QUEUE_IDX] == queue;
}

#endif /* _LKL_LIB_VIRTIO_H */
86 changes: 57 additions & 29 deletions tools/lkl/lib/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,8 @@
#define netdev_of(x) (container_of(x, struct virtio_net_dev, dev))
#define BIT(x) (1ULL << x)

/* We always have 2 queues on a netdev: one for tx, one for rx. */
#define RX_QUEUE_IDX 0
#define TX_QUEUE_IDX 1
#define NUM_QUEUES (TX_QUEUE_IDX + 1)
#define QUEUE_DEPTH 32
#define QUEUE_DEPTH 128

/* In fact, we'll hit the limit on the devs string below long before
* we hit this, but it's good enough for now. */
Expand Down Expand Up @@ -59,51 +56,82 @@ static void net_release_queue(struct virtio_dev *dev, int queue_idx)
lkl_host_ops.mutex_unlock(netdev_of(dev)->queue_locks[queue_idx]);
}

static inline int is_rx_queue(struct virtio_dev *dev, struct virtio_queue *queue)
{
return &dev->queue[RX_QUEUE_IDX] == queue;
}

static inline int is_tx_queue(struct virtio_dev *dev, struct virtio_queue *queue)
{
return &dev->queue[TX_QUEUE_IDX] == queue;
}

/* The buffers passed through "req" from the virtio_net driver always
* starts with a vnet_hdr. We need to check the backend device if it
* expects vnet_hdr and adjust buffer offset accordingly.
*/
static int net_enqueue(struct virtio_dev *dev, struct virtio_req *req)
{
struct lkl_virtio_net_hdr_v1 *header;
struct virtio_net_dev *net_dev;
int ret;
struct lkl_dev_buf iov[1];
int ret, len, i;
struct lkl_dev_buf *iov;

header = req->buf[0].addr;
net_dev = netdev_of(dev);
iov[0].len = req->buf[0].len - sizeof(*header);

iov[0].addr = &header[1];

if (!iov[0].len && req->buf_count > 1) {
iov[0].addr = req->buf[1].addr;
iov[0].len = req->buf[1].len;
if (!net_dev->nd->has_vnet_hdr) {
/* The backend device does not expect a vnet_hdr so adjust
* buf accordingly. (We make adjustment to req->buf so it
* can be used directly for the tx/rx call but remember to
* undo the change after the call.
* Note that it's ok to pass iov with entry's len==0.
* The caller will skip to the next entry correctly.
*/
req->buf[0].addr += sizeof(*header);
req->buf[0].len -= sizeof(*header);
}
iov = req->buf;

/* Pick which virtqueue to send the buffer(s) to */
if (is_tx_queue(dev, req->q)) {
ret = net_dev->ops->tx(net_dev->nd, iov, 1);
ret = net_dev->ops->tx(net_dev->nd, iov, req->buf_count);
if (ret < 0)
return -1;
i = 1;
} else if (is_rx_queue(dev, req->q)) {
header->num_buffers = 1;
ret = net_dev->ops->rx(net_dev->nd, iov, 1);
ret = net_dev->ops->rx(net_dev->nd, iov, req->buf_count);
if (ret < 0)
return -1;
if (net_dev->nd->has_vnet_hdr) {

/* if the number of bytes returned exactly matches
* the total space in the iov then there is a good
* chance we did not supply a large enough buffer for
* the whole pkt, i.e., pkt has been truncated.
* This is only likely to happen under mergeable RX
* buffer mode.
*/
if (req->mergeable_rx_len == (unsigned int)ret)
lkl_printf("PKT is likely truncated! len=%d\n",
ret);
} else {
header->flags = 0;
header->gso_type = LKL_VIRTIO_NET_HDR_GSO_NONE;
}
/* Have to compute how many descriptors we've consumed (really
* only matters to the the mergeable RX mode) and return it
* through "num_buffers".
*/
for (i = 0, len = ret; len > 0; i++)
len -= req->buf[i].len;
req->buf_count = header->num_buffers = i;
/* Need to set "buf_count" to how many we really used in
* order for virtio_req_complete() to work.
*/
if (dev->device_features & BIT(LKL_VIRTIO_NET_F_GUEST_CSUM))
header->flags = LKL_VIRTIO_NET_HDR_F_DATA_VALID;
} else {
bad_request("tried to push on non-existent queue");
return -1;
}

virtio_req_complete(req, iov[0].len + sizeof(*header));
return 0;
if (!net_dev->nd->has_vnet_hdr) {
/* Undo the adjustment */
req->buf[0].addr -= sizeof(*header);
req->buf[0].len += sizeof(*header);
ret += sizeof(struct lkl_virtio_net_hdr_v1);
}
virtio_req_complete(req, ret);
return i;
}

static struct virtio_dev_ops net_ops = {
Expand Down
7 changes: 6 additions & 1 deletion tools/lkl/lib/virtio_net_linux_fdnet.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ struct lkl_netdev_linux_fdnet_ops lkl_netdev_linux_fdnet_ops = {
#endif /* __NR_eventfd */
};

/* The following tx() and rx() code assume struct lkl_dev_buf matches
* sruct iovec so we can safely cast iov to (struct iovec *). (If
* BUILD_BUG_ON() were supported in LKL, I would have added
*
* "BUILD_BUG_ON(sizeof(struct lkl_dev_buf) == sizeof(struct iovec));"
*/
static int linux_fdnet_net_tx(struct lkl_netdev *nd,
struct lkl_dev_buf *iov, int cnt)
{
Expand Down Expand Up @@ -61,7 +67,6 @@ static int linux_fdnet_net_rx(struct lkl_netdev *nd,

if (ret < 0 && errno != EAGAIN)
perror("read from fdnet device fails");

return ret;
}

Expand Down

0 comments on commit bafaea1

Please sign in to comment.