diff --git a/arch/lkl/kernel/setup.c b/arch/lkl/kernel/setup.c index 3821383b52409e..b59b447df4412a 100644 --- a/arch/lkl/kernel/setup.c +++ b/arch/lkl/kernel/setup.c @@ -117,8 +117,6 @@ void machine_restart(char *unused) machine_halt(); } -extern int lkl_netdevs_remove(void); - long lkl_sys_halt(void) { long err; @@ -141,10 +139,8 @@ long lkl_sys_halt(void) lkl_ops->sem_free(init_sem); free_initial_syscall_thread(); - if (lkl_netdevs_remove() == 0) - /* We know that there is nothing else touching our - * memory. */ - free_mem(); + + free_mem(); return 0; } diff --git a/tools/lkl/include/lkl.h b/tools/lkl/include/lkl.h index c0a9f164fa5200..2a791a9c5fb404 100644 --- a/tools/lkl/include/lkl.h +++ b/tools/lkl/include/lkl.h @@ -270,15 +270,14 @@ struct lkl_netdev_args { int lkl_netdev_add(struct lkl_netdev *nd, struct lkl_netdev_args* args); /** -* lkl_netdevs_remove - destroy all network devices +* lkl_netdev_remove - remove a previously added network device * -* Attempts to release all resources held by network devices created +* Attempts to release all resources held by a network device created * via lkl_netdev_add. * -* @returns 0 if all devices are successfully removed, -1 if at least -* one fails. +* @id - the network device id, as return by @lkl_netdev_add */ -int lkl_netdevs_remove(void); +void lkl_netdev_remove(int id); /** * lkl_netdev_get_ifindex - retrieve the interface index for a given network diff --git a/tools/lkl/include/lkl_host.h b/tools/lkl/include/lkl_host.h index 9a7b5e45f572f2..b845ee2e2c0f45 100644 --- a/tools/lkl/include/lkl_host.h +++ b/tools/lkl/include/lkl_host.h @@ -49,12 +49,12 @@ struct lkl_dev_blk_ops { struct lkl_netdev { struct lkl_dev_net_ops *ops; - lkl_thread_t rx_tid, tx_tid; uint8_t has_vnet_hdr: 1; }; struct lkl_dev_net_ops { - /* Writes a L2 packet into the net device. + /* + * Writes a L2 packet into the net device. * * The data buffer can only hold 0 or 1 complete packets. * @@ -62,9 +62,11 @@ struct lkl_dev_net_ops { * @iov - pointer to the buffer vector * @cnt - # of vectors in iov. * @returns number of bytes transmitted - */ + */ int (*tx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); - /* Reads a packet from the net device. + + /* + * Reads a packet from the net device. * * It must only read one complete packet if present. * @@ -75,43 +77,32 @@ struct lkl_dev_net_ops { * @iov - pointer to the buffer vector to store the packet * @cnt - # of vectors in iov. * @returns number of bytes read for success or < 0 if error - */ + */ int (*rx)(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); + #define LKL_DEV_NET_POLL_RX 1 #define LKL_DEV_NET_POLL_TX 2 - /* Polls a net device. - * - * Supports only one of two events: LKL_DEV_NET_POLL_RX (readable) and - * LKL_DEV_NET_POLL_TX (writable). Blocks until one event is available. - * - * Implementation can assume only one of LKL_DEV_NET_POLL_RX or - * LKL_DEV_NET_POLL_TX is set in @events. +#define LKL_DEV_NET_POLL_HUP 4 + + /* + * Polls a net device. * - * Both LKL_DEV_NET_POLL_RX and LKL_DEV_NET_POLL_TX can be - * level-triggered or edge-triggered. When it's level-triggered, - * rx/tx thread can become a busy waiting loop which burns out CPU. - * This is more of a problem for tx, because LKL_DEV_NET_POLL_TX event - * is present most of the time. + * Supports the following events: LKL_DEV_NET_POLL_RX (readable), + * LKL_DEV_NET_POLL_TX (writable) or LKL_DEV_NET_POLL_HUP (the close + * operations has been issued and we need to clean up). Blocks until one + * event is available. * * @nd - pointer to the network device - * @events - a bit mask specifying the events to poll on. Only one of - * LKL_DEV_NET_POLL_RX or LKL_DEV_NET_POLL_TX is set. - * @returns the events triggered for success. -1 for failure. */ - int (*poll)(struct lkl_netdev *nd, int events); - /* Closes a net device. - * - * Implementation can choose to release any resources releated to it. In - * particular, the polling threads are to be killed in this function. - * - * Implemenation must guarantee it's safe to call free_mem() after this - * function call. - * - * Not implemented by all netdev types. + int (*poll)(struct lkl_netdev *nd); + + /* + * Closes a net device. * - * @returns 0 for success. -1 for failure. + * Implementation must release its resources and poll must wakeup and + * return LKL_DEV_NET_POLL_HUP. */ - int (*close)(struct lkl_netdev *nd); + void (*close)(struct lkl_netdev *nd); }; #ifdef __cplusplus diff --git a/tools/lkl/lib/Build b/tools/lkl/lib/Build index d7ab3af59ada57..5fb91262277ebe 100644 --- a/tools/lkl/lib/Build +++ b/tools/lkl/lib/Build @@ -12,7 +12,7 @@ lkl-y += virtio.o lkl-y += dbg.o lkl-y += dbg_handler.o lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net.o -lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_linux_fdnet.o +lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_fd.o lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_tap.o lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_raw.o lkl-$(CONFIG_AUTO_LKL_POSIX_HOST) += virtio_net_macvtap.o diff --git a/tools/lkl/lib/hijack/hijack.c b/tools/lkl/lib/hijack/hijack.c index d512c033a06e84..afb3c84a1ab345 100644 --- a/tools/lkl/lib/hijack/hijack.c +++ b/tools/lkl/lib/hijack/hijack.c @@ -27,6 +27,7 @@ #include #include "xlate.h" +#include "init.h" static int is_lklfd(int fd) { @@ -75,6 +76,29 @@ static host_call host_calls[__lkl__NR_syscalls]; asm(".global " #name); \ asm(".set " #name "," #name "_hook"); \ +#define HOOK_CALL_USE_HOST_BEFORE_START(name) \ + static void __attribute__((constructor(101))) \ + init_host_##name(void) \ + { \ + host_calls[__lkl__NR_##name] = resolve_sym(#name); \ + } \ + \ + long name##_hook(long p1, long p2, long p3, long p4, long p5, \ + long p6) \ + { \ + long p[6] = {p1, p2, p3, p4, p5, p6 }; \ + \ + if (!host_calls[__lkl__NR_##name]) \ + host_calls[__lkl__NR_##name] = resolve_sym(#name); \ + if (!lkl_running) \ + return host_calls[__lkl__NR_##name](p1, p2, p3, \ + p4, p5, p6); \ + \ + return lkl_set_errno(lkl_syscall(__lkl__NR_##name, p)); \ + } \ + asm(".global " #name); \ + asm(".set " #name "," #name "_hook") + #define HOST_CALL(name) \ static long (*host_##name)(); \ static void __attribute__((constructor(101))) \ @@ -131,7 +155,7 @@ HOOK_FD_CALL(read) HOOK_FD_CALL(recvfrom) HOOK_FD_CALL(recv) HOOK_FD_CALL(epoll_wait) -HOOK_CALL(pipe); +HOOK_CALL_USE_HOST_BEFORE_START(pipe); HOST_CALL(setsockopt); int setsockopt(int fd, int level, int optname, const void *optval, @@ -264,7 +288,7 @@ int select(int nfds, fd_set *r, fd_set *w, fd_set *e, struct timeval *t) return lkl_call(__lkl__NR_select, 5, nfds, r, w, e, t); } -HOOK_CALL(epoll_create) +HOOK_CALL_USE_HOST_BEFORE_START(epoll_create); HOST_CALL(epoll_ctl); int epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event) @@ -282,6 +306,12 @@ int epoll_ctl(int epollfd, int op, int fd, struct epoll_event *event) int eventfd(unsigned int count, int flags) { + if (!lkl_running) { + int (*f)(unsigned int, int) = resolve_sym("eventfd"); + + return f(count, flags); + } + return lkl_sys_eventfd2(count, flags); } diff --git a/tools/lkl/lib/hijack/init.c b/tools/lkl/lib/hijack/init.c index 304ae27eb0bf2e..9ca6745f393606 100644 --- a/tools/lkl/lib/hijack/init.c +++ b/tools/lkl/lib/hijack/init.c @@ -25,7 +25,6 @@ #include #include "xlate.h" -#include "../virtio_net_linux_fdnet.h" #define __USE_GNU #include @@ -155,13 +154,6 @@ static void mount_cmds_exec(char *_cmds, int (*callback)(char*)) free(cmds); } -void fixup_netdev_linux_fdnet_ops(void) -{ - /* It's okay if this is NULL, because then netdev close will - * fall back onto an uncloseable implementation. */ - lkl_netdev_linux_fdnet_ops.eventfd = dlsym(RTLD_NEXT, "eventfd"); -} - static void PinToCpus(const cpu_set_t* cpus) { if (sched_setaffinity(0, sizeof(cpu_set_t), cpus)) { @@ -184,12 +176,14 @@ static void PinToFirstCpu(const cpu_set_t* cpus) } } -int lkl_debug; +int lkl_debug, lkl_running; + +static int nd_id = -1; void __attribute__((constructor(102))) hijack_init(void) { - int ret, i, dev_null, nd_id = -1, nd_ifindex = -1; + int ret, i, dev_null, nd_ifindex = -1; /* OBSOLETE: should use IFTYPE and IFPARAMS */ char *tap = getenv("LKL_HIJACK_NET_TAP"); char *iftype = getenv("LKL_HIJACK_NET_IFTYPE"); @@ -261,9 +255,6 @@ hijack_init(void) if (single_cpu_mode == 2) PinToFirstCpu(&ori_cpu); - /* Must be run before lkl_netdev_tap_create */ - fixup_netdev_linux_fdnet_ops(); - if (tap) { fprintf(stderr, "WARN: variable LKL_HIJACK_NET_TAP is now obsoleted.\n" @@ -329,6 +320,8 @@ hijack_init(void) return; } + lkl_running = 1; + /* restore cpu affinity */ if (single_cpu_mode) PinToCpus(&ori_cpu); @@ -440,6 +433,8 @@ hijack_fini(void) for (i = 0; i < LKL_FD_OFFSET; i++) lkl_sys_close(i); + if (nd_id >= 0) + lkl_netdev_remove(nd_id); lkl_sys_halt(); } diff --git a/tools/lkl/lib/hijack/init.h b/tools/lkl/lib/hijack/init.h new file mode 100644 index 00000000000000..2ee19d865d88f6 --- /dev/null +++ b/tools/lkl/lib/hijack/init.h @@ -0,0 +1,6 @@ +#ifndef _LKL_HIJACK_INIT_H +#define _LKL_HIJACK_INIT_H + +extern int lkl_running; + +#endif /*_LKL_HIJACK_INIT_H */ diff --git a/tools/lkl/lib/virtio_net.c b/tools/lkl/lib/virtio_net.c index 1173f4d6046b25..d0a34ae484cb9a 100644 --- a/tools/lkl/lib/virtio_net.c +++ b/tools/lkl/lib/virtio_net.c @@ -24,18 +24,12 @@ #define bad_request(s) lkl_printf("virtio_net: %s\n", s); #endif /* DEBUG */ -struct virtio_net_poll { - struct virtio_net_dev *dev; - int event; -}; - struct virtio_net_dev { struct virtio_dev dev; struct lkl_virtio_net_config config; - struct lkl_dev_net_ops *ops; struct lkl_netdev *nd; - struct virtio_net_poll rx_poll, tx_poll; struct lkl_mutex **queue_locks; + lkl_thread_t poll_tid; }; static int net_check_features(struct virtio_dev *dev) @@ -84,12 +78,12 @@ static int net_enqueue(struct virtio_dev *dev, struct virtio_req *req) /* Pick which virtqueue to send the buffer(s) to */ if (is_tx_queue(dev, req->q)) { - ret = net_dev->ops->tx(net_dev->nd, iov, req->buf_count); + ret = net_dev->nd->ops->tx(net_dev->nd, iov, req->buf_count); if (ret < 0) return -1; i = 1; } else if (is_rx_queue(dev, req->q)) { - ret = net_dev->ops->rx(net_dev->nd, iov, req->buf_count); + ret = net_dev->nd->ops->rx(net_dev->nd, iov, req->buf_count); if (ret < 0) return -1; if (net_dev->nd->has_vnet_hdr) { @@ -143,16 +137,23 @@ static struct virtio_dev_ops net_ops = { void poll_thread(void *arg) { - struct virtio_net_poll *np = (struct virtio_net_poll *)arg; - int ret; + struct virtio_net_dev *dev = arg; /* Synchronization is handled in virtio_process_queue */ - while ((ret = np->dev->ops->poll(np->dev->nd, np->event)) >= 0) { + do { + int ret = dev->nd->ops->poll(dev->nd); + + if (ret < 0) { + lkl_printf("virtio net poll error: %d\n", ret); + continue; + } + if (ret & LKL_DEV_NET_POLL_HUP) + break; if (ret & LKL_DEV_NET_POLL_RX) - virtio_process_queue(&np->dev->dev, 0); + virtio_process_queue(&dev->dev, 0); if (ret & LKL_DEV_NET_POLL_TX) - virtio_process_queue(&np->dev->dev, 1); - } + virtio_process_queue(&dev->dev, 1); + } while (1); } struct virtio_net_dev *registered_devs[MAX_NET_DEVS]; @@ -225,19 +226,12 @@ int lkl_netdev_add(struct lkl_netdev *nd, struct lkl_netdev_args* args) dev->dev.config_data = &dev->config; dev->dev.config_len = sizeof(dev->config); dev->dev.ops = &net_ops; - dev->ops = nd->ops; dev->nd = nd; dev->queue_locks = init_queue_locks(NUM_QUEUES); if (!dev->queue_locks) goto out_free; - dev->rx_poll.event = LKL_DEV_NET_POLL_RX; - dev->rx_poll.dev = dev; - - dev->tx_poll.event = LKL_DEV_NET_POLL_TX; - dev->tx_poll.dev = dev; - /* MUST match the number of queue locks we initialized. We * could init the queues in virtio_dev_setup to help enforce * this, but netdevs are the only flavor that need these @@ -247,12 +241,8 @@ int lkl_netdev_add(struct lkl_netdev *nd, struct lkl_netdev_args* args) if (ret) goto out_free; - nd->rx_tid = lkl_host_ops.thread_create(poll_thread, &dev->rx_poll); - if (nd->rx_tid == 0) - goto out_cleanup_dev; - - nd->tx_tid = lkl_host_ops.thread_create(poll_thread, &dev->tx_poll); - if (nd->tx_tid == 0) + dev->poll_tid = lkl_host_ops.thread_create(poll_thread, dev); + if (dev->poll_tid == 0) goto out_cleanup_dev; ret = dev_register(dev); @@ -273,38 +263,39 @@ int lkl_netdev_add(struct lkl_netdev *nd, struct lkl_netdev_args* args) } /* Return 0 for success, -1 for failure. */ -static int lkl_netdev_remove(struct virtio_net_dev *dev) +void lkl_netdev_remove(int id) { - if (!dev->nd->ops->close) - /* Can't kill the poll threads, so we can't do - * anything safely. */ - return -1; + struct virtio_net_dev *dev; + int ret; - if (dev->nd->ops->close(dev->nd) < 0) - /* Something went wrong */ - return -1; + if (id >= registered_dev_idx) { + lkl_printf("%s: invalid id: %d\n", __func__, id); + return; + } - virtio_dev_cleanup(&dev->dev); + dev = registered_devs[id]; - lkl_host_ops.mem_free(dev->nd); - free_queue_locks(dev->queue_locks, NUM_QUEUES); - lkl_host_ops.mem_free(dev); + ret = lkl_netdev_get_ifindex(id); + if (ret < 0) { + lkl_printf("%s: failed to get ifindex for id %d: %s\n", + __func__, id, lkl_strerror(ret)); + return; + } - return 0; -} + ret = lkl_if_down(ret); + if (ret < 0) { + lkl_printf("%s: failed to put interface id %d down: %s\n", + __func__, id, lkl_strerror(ret)); + return; + } -int lkl_netdevs_remove(void) -{ - int i = 0, failure_count = 0; + dev->nd->ops->close(dev->nd); - for (; i < registered_dev_idx; i++) - failure_count -= lkl_netdev_remove(registered_devs[i]); + lkl_host_ops.thread_join(dev->poll_tid); - if (failure_count) { - lkl_printf("WARN: failed to free %d of %d netdevs.\n", - failure_count, registered_dev_idx); - return -1; - } + virtio_dev_cleanup(&dev->dev); - return 0; + lkl_host_ops.mem_free(dev->nd); + free_queue_locks(dev->queue_locks, NUM_QUEUES); + lkl_host_ops.mem_free(dev); } diff --git a/tools/lkl/lib/virtio_net_dpdk.c b/tools/lkl/lib/virtio_net_dpdk.c index 10a8884d55111d..1e73c356e6535a 100644 --- a/tools/lkl/lib/virtio_net_dpdk.c +++ b/tools/lkl/lib/virtio_net_dpdk.c @@ -27,6 +27,8 @@ #include #include +#include "virtio.h" + #include static char * const ealargs[] = { @@ -56,6 +58,7 @@ struct lkl_netdev_dpdk { struct rte_mbuf *rms[MAX_PKT_BURST]; int npkts; int bufidx; + int close; }; static int net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) @@ -151,28 +154,35 @@ static int net_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) return read; } -static int net_poll(struct lkl_netdev *nd, int events) +static int net_poll(struct lkl_netdev *nd) { - int ret = 0; + struct lkl_netdev_dpdk *nd_dpdk = + container_of(nd, struct lkl_netdev_dpdk, dev); + if (nd_dpdk->close) + return LKL_DEV_NET_POLL_HUP; /* * dpdk's interrupt mode has equivalent of epoll_wait(2), * which we can apply here. but AFAIK the mode is only available * on limited NIC drivers like ixgbe/igb/e1000 (with dpdk v2.2.0), * while vmxnet3 is not supported e.g.. */ - if (events & LKL_DEV_NET_POLL_RX) - ret |= LKL_DEV_NET_POLL_RX; - if (events & LKL_DEV_NET_POLL_TX) - ret |= LKL_DEV_NET_POLL_TX; + return LKL_DEV_NET_POLL_RX | LKL_DEV_NET_POLL_TX; +} + +static void net_close(struct lkl_netdev *nd) +{ + struct lkl_netdev_dpdk *nd_dpdk = + container_of(nd, struct lkl_netdev_dpdk, dev); - return ret; + nd_dpdk->close = 1; } struct lkl_dev_net_ops dpdk_net_ops = { .tx = net_tx, .rx = net_rx, .poll = net_poll, + .close = net_close, }; diff --git a/tools/lkl/lib/virtio_net_fd.c b/tools/lkl/lib/virtio_net_fd.c new file mode 100644 index 00000000000000..37066c8dc79be4 --- /dev/null +++ b/tools/lkl/lib/virtio_net_fd.c @@ -0,0 +1,211 @@ +/* + * POSIX file descriptor based virtual network interface feature for + * LKL Copyright (c) 2015,2016 Ryo Nakamura, Hajime Tazaki + * + * Author: Ryo Nakamura + * Hajime Tazaki + * Octavian Purdila + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virtio.h" +#include "virtio_net_fd.h" + +struct lkl_netdev_fd { + struct lkl_netdev dev; + /* file-descriptor based device */ + int fd; + /* + * Controlls the poll mask for fd. Can be acccessed concurrently from + * poll, tx, or rx routines but there is no need for syncronization + * because: + * + * (a) TX and RX routines set different variables so even if they update + * at the same time there is no race condition + * + * (b) Even if poll and TX / RX update at the same time poll cannot + * stall: when poll resets the poll variable we know that TX / RX will + * run which means that eventually the poll variable will be set. + */ + int poll_tx, poll_rx; + /* controle pipe */ + int pipe[2]; +}; + +/* The following tx() and rx() code assume struct lkl_dev_buf matches + * sruct iovec so we can safely cast iov to (struct iovec *). (If + * BUILD_BUG_ON() were supported in LKL, I would have added + * + * "BUILD_BUG_ON(sizeof(struct lkl_dev_buf) == sizeof(struct iovec));" + */ +static int fd_net_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +{ + int ret; + struct lkl_netdev_fd *nd_fd = + container_of(nd, struct lkl_netdev_fd, dev); + + do { + ret = writev(nd_fd->fd, (struct iovec *)iov, cnt); + } while (ret == -1 && errno == EINTR); + + if (ret < 0) { + if (errno != EAGAIN) { + perror("write to fd netdev fails"); + } else { + char tmp; + + nd_fd->poll_tx = 1; + if (write(nd_fd->pipe[1], &tmp, 1) <= 0) + perror("virtio net fd pipe write"); + } + } + return ret; +} + +static int fd_net_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) +{ + int ret; + struct lkl_netdev_fd *nd_fd = + container_of(nd, struct lkl_netdev_fd, dev); + + do { + ret = readv(nd_fd->fd, (struct iovec *)iov, cnt); + } while (ret == -1 && errno == EINTR); + + if (ret < 0) { + if (errno != EAGAIN) { + perror("virtio net fd read"); + } else { + char tmp; + + nd_fd->poll_rx = 1; + if (write(nd_fd->pipe[1], &tmp, 1) < 0) + perror("virtio net fd pipe write"); + } + } + return ret; +} + +static int fd_net_poll(struct lkl_netdev *nd) +{ + struct lkl_netdev_fd *nd_fd = + container_of(nd, struct lkl_netdev_fd, dev); + struct pollfd pfds[2] = { + { + .fd = nd_fd->fd, + }, + { + .fd = nd_fd->pipe[0], + .events = POLLIN, + }, + }; + int ret; + + if (nd_fd->poll_rx) + pfds[0].events |= POLLIN|POLLPRI; + if (nd_fd->poll_tx) + pfds[0].events |= POLLOUT; + + do { + ret = poll(pfds, 2, -1); + } while (ret == -1 && errno == EINTR); + + if (ret < 0) { + perror("virtio net fd poll"); + return 0; + } + + if (pfds[1].revents & (POLLHUP|POLLNVAL)) + return LKL_DEV_NET_POLL_HUP; + + if (pfds[1].revents & POLLIN) { + char tmp[PIPE_BUF]; + + ret = read(nd_fd->pipe[0], tmp, PIPE_BUF); + if (ret == 0) + return LKL_DEV_NET_POLL_HUP; + if (ret < 0) + perror("virtio net fd pipe read"); + } + + ret = 0; + + if (pfds[0].revents & (POLLIN|POLLPRI)) { + nd_fd->poll_rx = 0; + ret |= LKL_DEV_NET_POLL_RX; + } + + if (pfds[0].revents & POLLOUT) { + nd_fd->poll_tx = 0; + ret |= LKL_DEV_NET_POLL_TX; + } + + return ret; +} + +static void fd_net_close(struct lkl_netdev *nd) +{ + struct lkl_netdev_fd *nd_fd = + container_of(nd, struct lkl_netdev_fd, dev); + + /* this will cause a POLLHUP in the poll function */ + close(nd_fd->pipe[1]); + close(nd_fd->pipe[0]); + close(nd_fd->fd); +} + +struct lkl_dev_net_ops fd_net_ops = { + .tx = fd_net_tx, + .rx = fd_net_rx, + .poll = fd_net_poll, + .close = fd_net_close, +}; + +struct lkl_netdev *lkl_register_netdev_fd(int fd) +{ + struct lkl_netdev_fd *nd; + + nd = malloc(sizeof(*nd)); + if (!nd) { + fprintf(stderr, "fdnet: failed to allocate memory\n"); + /* TODO: propagate the error state, maybe use errno for that? */ + return NULL; + } + + memset(nd, 0, sizeof(*nd)); + + nd->fd = fd; + if (pipe(nd->pipe) < 0) { + perror("pipe"); + lkl_unregister_netdev_fd(&nd->dev); + return NULL; + } + + if (fcntl(nd->pipe[0], F_SETFL, O_NONBLOCK) < 0) { + perror("fnctl"); + close(nd->pipe[0]); + close(nd->pipe[1]); + lkl_unregister_netdev_fd(&nd->dev); + } + + nd->dev.ops = &fd_net_ops; + return &nd->dev; +} + +void lkl_unregister_netdev_fd(struct lkl_netdev *nd) +{ + struct lkl_netdev_fd *nd_fd = + container_of(nd, struct lkl_netdev_fd, dev); + + fd_net_close(nd); + free(nd_fd); +} diff --git a/tools/lkl/lib/virtio_net_fd.h b/tools/lkl/lib/virtio_net_fd.h new file mode 100644 index 00000000000000..a4105e5563ecf9 --- /dev/null +++ b/tools/lkl/lib/virtio_net_fd.h @@ -0,0 +1,34 @@ +#ifndef _VIRTIO_NET_FD_H +#define _VIRTIO_NET_FD_H + +struct ifreq; + +/** + * lkl_register_netdev_linux_fdnet - register a file descriptor-based network + * device as a NIC + * + * @fd - a POSIX file descriptor number for input/output + * @returns a struct lkl_netdev_linux_fdnet entry for virtio-net + */ +struct lkl_netdev *lkl_register_netdev_fd(int fd); + + +/** + * lkl_unregister_netdev_linux_fdnet - unregister a file descriptor-based + * network device as a NIC + * + * @nd - a struct lkl_netdev_linux_fdnet entry to be unregistered + */ +void lkl_unregister_netdev_fd(struct lkl_netdev *nd); + +/** + * lkl_netdev_tap_init - initialize tap related structure fot lkl_netdev. + * + * @path - the path to open the device. + * @offload - offload bits for the device + * @ifr - struct ifreq for ioctl. + */ +struct lkl_netdev *lkl_netdev_tap_init(const char *path, int offload, + struct ifreq *ifr); + +#endif /* _VIRTIO_NET_FD_H*/ diff --git a/tools/lkl/lib/virtio_net_linux_fdnet.c b/tools/lkl/lib/virtio_net_linux_fdnet.c deleted file mode 100644 index 74f60b8b168f14..00000000000000 --- a/tools/lkl/lib/virtio_net_linux_fdnet.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Linux File descripter based virtual network interface feature for LKL - * Copyright (c) 2015,2016 Ryo Nakamura, Hajime Tazaki - * - * Author: Ryo Nakamura - * Hajime Tazaki - * Octavian Purdila - * - * Current implementation is linux-specific. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "virtio.h" -#include "virtio_net_linux_fdnet.h" - -struct lkl_netdev_linux_fdnet_ops lkl_netdev_linux_fdnet_ops = { - /* - * /dev/net/tun is Linux specific so we know our host is some - * flavor of Linux, but this allows graceful support if we're - * on a kernel that's < 2.6.22. - */ - #ifdef __NR_eventfd - /* This sigature was recently (9/2014) changed in glibc. */ - .eventfd = (int (*)(unsigned int, int))eventfd, - #endif /* __NR_eventfd */ -}; - -/* The following tx() and rx() code assume struct lkl_dev_buf matches - * sruct iovec so we can safely cast iov to (struct iovec *). (If - * BUILD_BUG_ON() were supported in LKL, I would have added - * - * "BUILD_BUG_ON(sizeof(struct lkl_dev_buf) == sizeof(struct iovec));" - */ -static int linux_fdnet_net_tx(struct lkl_netdev *nd, - struct lkl_dev_buf *iov, int cnt) -{ - int ret; - struct lkl_netdev_linux_fdnet *nd_fdnet = - container_of(nd, struct lkl_netdev_linux_fdnet, dev); - - do { - ret = writev(nd_fdnet->fd, (struct iovec *)iov, cnt); - } while (ret == -1 && errno == EINTR); - - if (ret < 0 && errno != EAGAIN) - perror("write to Linux fd netdev fails"); - return ret; -} - -static int linux_fdnet_net_rx(struct lkl_netdev *nd, - struct lkl_dev_buf *iov, int cnt) -{ - int ret; - struct lkl_netdev_linux_fdnet *nd_fdnet = - container_of(nd, struct lkl_netdev_linux_fdnet, dev); - - do { - ret = readv(nd_fdnet->fd, (struct iovec *)iov, cnt); - } while (ret == -1 && errno == EINTR); - - if (ret < 0 && errno != EAGAIN) - perror("read from fdnet device fails"); - return ret; -} - -static int linux_fdnet_net_poll(struct lkl_netdev *nd, int events) -{ - struct lkl_netdev_linux_fdnet *nd_fdnet = - container_of(nd, struct lkl_netdev_linux_fdnet, dev); - int epoll_fd = -1; - struct epoll_event ev[2]; - int ret; - const int is_rx = events & LKL_DEV_NET_POLL_RX; - const int is_tx = events & LKL_DEV_NET_POLL_TX; - int i; - int ret_ev = 0; - unsigned int event; - - if (is_rx && is_tx) { - fprintf(stderr, "both LKL_DEV_NET_POLL_RX and " - "LKL_DEV_NET_POLL_TX are set\n"); - lkl_host_ops.panic(); - return -1; - } - if (!is_rx && !is_tx) { - fprintf(stderr, "Neither LKL_DEV_NET_POLL_RX nor" - " LKL_DEV_NET_POLL_TX are set.\n"); - lkl_host_ops.panic(); - return -1; - } - - if (is_rx) - epoll_fd = nd_fdnet->epoll_rx_fd; - else if (is_tx) - epoll_fd = nd_fdnet->epoll_tx_fd; - - do { - ret = epoll_wait(epoll_fd, ev, 2, -1); - } while (ret == -1 && errno == EINTR); - if (ret < 0) { - perror("epoll_wait"); - return -1; - } - - for (i = 0; i < ret; ++i) { - if (ev[i].data.fd == nd_fdnet->eventfd) - return -1; - if (ev[i].data.fd == nd_fdnet->fd) { - event = ev[i].events; - if (event & (EPOLLIN | EPOLLPRI)) - ret_ev = LKL_DEV_NET_POLL_RX; - else if (event & EPOLLOUT) - ret_ev = LKL_DEV_NET_POLL_TX; - else - return -1; - } - } - return ret_ev; -} - -static int linux_fdnet_net_close(struct lkl_netdev *nd) -{ - long buf = 1; - struct lkl_netdev_linux_fdnet *nd_fdnet = - container_of(nd, struct lkl_netdev_linux_fdnet, dev); - - if (nd_fdnet->eventfd == -1) { - /* No eventfd support. */ - return 0; - } - - if (write(nd_fdnet->eventfd, &buf, sizeof(buf)) < 0) { - perror("linux-fdnet: failed to close fd"); - /* This should never happen. */ - return -1; - } - - /* The order that we join in doesn't matter. */ - if (lkl_host_ops.thread_join(nd->rx_tid) || - lkl_host_ops.thread_join(nd->tx_tid)) - return -1; - - /* nor does the order that we close */ - if (close(nd_fdnet->fd) || close(nd_fdnet->eventfd) || - close(nd_fdnet->epoll_rx_fd) || close(nd_fdnet->epoll_tx_fd)) { - perror("linux-fdnet net_close fd"); - return -1; - } - - return 0; -} - -struct lkl_dev_net_ops linux_fdnet_net_ops = { - .tx = linux_fdnet_net_tx, - .rx = linux_fdnet_net_rx, - .poll = linux_fdnet_net_poll, - .close = linux_fdnet_net_close, -}; - -static int add_to_epoll(int epoll_fd, int fd, unsigned int events) -{ - struct epoll_event ev; - int ret; - - memset(&ev, 0, sizeof(ev)); - ev.events = events; - ev.data.fd = fd; - ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &ev); - if (ret) { - perror("EPOLL_CTL_ADD fails"); - return -1; - } - return 0; -} - -static int create_epoll_fd(int fd, unsigned int events) -{ - int ret = epoll_create1(0); - - if (ret < 0) { - perror("epoll_create1"); - return -1; - } - if (add_to_epoll(ret, fd, events)) { - close(ret); - return -1; - } - return ret; -} - - -struct lkl_netdev_linux_fdnet *lkl_register_netdev_linux_fdnet(int fd) -{ - struct lkl_netdev_linux_fdnet *nd; - - nd = (struct lkl_netdev_linux_fdnet *) - malloc(sizeof(struct lkl_netdev_linux_fdnet)); - if (!nd) { - fprintf(stderr, "fdnet: failed to allocate memory\n"); - /* TODO: propagate the error state, maybe use errno for that? */ - return NULL; - } - - memset(nd, 0, sizeof(struct lkl_netdev_linux_fdnet)); - - nd->fd = fd; - /* Making them edge-triggered to save CPU. */ - nd->epoll_rx_fd = create_epoll_fd(nd->fd, EPOLLIN | EPOLLPRI | EPOLLET); - nd->epoll_tx_fd = create_epoll_fd(nd->fd, EPOLLOUT | EPOLLET); - if (nd->epoll_rx_fd < 0 || nd->epoll_tx_fd < 0) { - if (nd->epoll_rx_fd >= 0) - close(nd->epoll_rx_fd); - if (nd->epoll_tx_fd >= 0) - close(nd->epoll_tx_fd); - lkl_unregister_netdev_linux_fdnet(nd); - return NULL; - } - - if (lkl_netdev_linux_fdnet_ops.eventfd) { - /* eventfd is supported by the host, all is well */ - nd->eventfd = lkl_netdev_linux_fdnet_ops.eventfd( - 0, EFD_NONBLOCK | EFD_SEMAPHORE); - - if (nd->eventfd < 0) { - perror("fdnet: create eventfd"); - lkl_unregister_netdev_linux_fdnet(nd); - return NULL; - } - if (add_to_epoll(nd->epoll_rx_fd, nd->eventfd, EPOLLIN) || - add_to_epoll(nd->epoll_tx_fd, nd->eventfd, EPOLLIN)) { - lkl_unregister_netdev_linux_fdnet(nd); - return NULL; - } - } else { - /* no host eventfd support */ - nd->eventfd = -1; - } - - nd->dev.ops = &linux_fdnet_net_ops; - return nd; -} - -void lkl_unregister_netdev_linux_fdnet(struct lkl_netdev_linux_fdnet *nd) -{ - close(nd->eventfd); - close(nd->epoll_rx_fd); - close(nd->epoll_tx_fd); - free(nd); -} diff --git a/tools/lkl/lib/virtio_net_linux_fdnet.h b/tools/lkl/lib/virtio_net_linux_fdnet.h deleted file mode 100644 index 73684ae34c0395..00000000000000 --- a/tools/lkl/lib/virtio_net_linux_fdnet.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _VIRTIO_NET_LINUX_FDNET_H -#define _VIRTIO_NET_LINUX_FDNET_H - -struct ifreq; - -struct lkl_netdev_linux_fdnet { - struct lkl_netdev dev; - /* file-descriptor based device */ - int fd; - /* Needed to initiate shutdown */ - int eventfd; - /* epoll fds for rx and tx */ - int epoll_rx_fd; - int epoll_tx_fd; -}; - -extern struct lkl_netdev_linux_fdnet_ops { - /* - * We need this so that we can "unhijack" this function in - * case we decided to hijack it. - */ - int (*eventfd)(unsigned int initval, int flags); -} lkl_netdev_linux_fdnet_ops; - -/** - * lkl_register_netdev_linux_fdnet - register a file descriptor-based network - * device as a NIC - * - * @fd - a POSIX file descriptor number for input/output - * @returns a struct lkl_netdev_linux_fdnet entry for virtio-net - */ -struct lkl_netdev_linux_fdnet *lkl_register_netdev_linux_fdnet(int fd); - - -/** - * lkl_unregister_netdev_linux_fdnet - unregister a file descriptor-based - * network device as a NIC - * - * @nd - a struct lkl_netdev_linux_fdnet entry to be unregistered - */ -void lkl_unregister_netdev_linux_fdnet(struct lkl_netdev_linux_fdnet *nd); - -/** - * lkl_netdev_tap_init - initialize tap related structure fot lkl_netdev. - * - * @path - the path to open the device. - * @offload - offload bits for the device - * @ifr - struct ifreq for ioctl. - */ -struct lkl_netdev_linux_fdnet *lkl_netdev_tap_init(const char *path, - int offload, - struct ifreq *ifr); - -#endif /* _VIRTIO_NET_LINUX_FDNET_H*/ diff --git a/tools/lkl/lib/virtio_net_macvtap.c b/tools/lkl/lib/virtio_net_macvtap.c index 644d391da90e33..1e97b741f8d78b 100644 --- a/tools/lkl/lib/virtio_net_macvtap.c +++ b/tools/lkl/lib/virtio_net_macvtap.c @@ -19,7 +19,7 @@ #include #include "virtio.h" -#include "virtio_net_linux_fdnet.h" +#include "virtio_net_fd.h" struct lkl_netdev *lkl_netdev_macvtap_create(const char *path, int offload) { @@ -27,5 +27,5 @@ struct lkl_netdev *lkl_netdev_macvtap_create(const char *path, int offload) .ifr_flags = IFF_TAP | IFF_NO_PI, }; - return (struct lkl_netdev *)lkl_netdev_tap_init(path, offload, &ifr); + return lkl_netdev_tap_init(path, offload, &ifr); } diff --git a/tools/lkl/lib/virtio_net_raw.c b/tools/lkl/lib/virtio_net_raw.c index ac6edd42e71526..1161e945f06720 100644 --- a/tools/lkl/lib/virtio_net_raw.c +++ b/tools/lkl/lib/virtio_net_raw.c @@ -19,7 +19,7 @@ #include #include "virtio.h" -#include "virtio_net_linux_fdnet.h" +#include "virtio_net_fd.h" /* since Linux 3.14 (man 7 packet) */ #ifndef PACKET_QDISC_BYPASS @@ -28,7 +28,6 @@ struct lkl_netdev *lkl_netdev_raw_create(const char *ifname) { - struct lkl_netdev_linux_fdnet *nd; int ret; struct sockaddr_ll ll; int fd, fd_flags, val; @@ -59,11 +58,5 @@ struct lkl_netdev *lkl_netdev_raw_create(const char *ifname) fd_flags = fcntl(fd, F_GETFD, NULL); fcntl(fd, F_SETFL, fd_flags | O_NONBLOCK); - nd = lkl_register_netdev_linux_fdnet(fd); - if (!nd) { - perror("failed to register to."); - return NULL; - } - - return (struct lkl_netdev *)nd; + return lkl_register_netdev_fd(fd); } diff --git a/tools/lkl/lib/virtio_net_tap.c b/tools/lkl/lib/virtio_net_tap.c index 22b83a6ee02fb2..60bf1fd4fc5f69 100644 --- a/tools/lkl/lib/virtio_net_tap.c +++ b/tools/lkl/lib/virtio_net_tap.c @@ -19,15 +19,14 @@ #include #include "virtio.h" -#include "virtio_net_linux_fdnet.h" +#include "virtio_net_fd.h" #define BIT(x) (1ULL << x) -struct lkl_netdev_linux_fdnet *lkl_netdev_tap_init(const char *path, - int offload, - struct ifreq *ifr) +struct lkl_netdev *lkl_netdev_tap_init(const char *path, int offload, + struct ifreq *ifr) { - struct lkl_netdev_linux_fdnet *nd; + struct lkl_netdev *nd; int fd, ret, tap_arg = 0, vnet_hdr_sz = 0; if (offload & BIT(LKL_VIRTIO_NET_F_GUEST_CSUM)) @@ -68,14 +67,14 @@ struct lkl_netdev_linux_fdnet *lkl_netdev_tap_init(const char *path, close(fd); return NULL; } - nd = lkl_register_netdev_linux_fdnet(fd); + nd = lkl_register_netdev_fd(fd); if (!nd) { perror("failed to register to."); close(fd); return NULL; } - nd->dev.has_vnet_hdr = (vnet_hdr_sz != 0); + nd->has_vnet_hdr = (vnet_hdr_sz != 0); return nd; } @@ -88,5 +87,5 @@ struct lkl_netdev *lkl_netdev_tap_create(const char *ifname, int offload) strncpy(ifr.ifr_name, ifname, IFNAMSIZ); - return (struct lkl_netdev *)lkl_netdev_tap_init(path, offload, &ifr); + return lkl_netdev_tap_init(path, offload, &ifr); } diff --git a/tools/lkl/lib/virtio_net_vde.c b/tools/lkl/lib/virtio_net_vde.c index 20535439fae309..f41ad1d33c1992 100644 --- a/tools/lkl/lib/virtio_net_vde.c +++ b/tools/lkl/lib/virtio_net_vde.c @@ -8,35 +8,37 @@ #include #include +#include "virtio.h" + #include struct lkl_netdev_vde { - struct lkl_dev_net_ops *ops; + struct lkl_netdev dev; VDECONN *conn; }; struct lkl_netdev *nuse_vif_vde_create(char *switch_path); static int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); static int net_vde_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt); -static int net_vde_poll_with_timeout(struct lkl_netdev *nd, int events, - int timeout); -static int net_vde_poll(struct lkl_netdev *nd, int events); +static int net_vde_poll_with_timeout(struct lkl_netdev *nd, int timeout); +static int net_vde_poll(struct lkl_netdev *nd); +static int net_vde_close(struct lkl_netdev *nd); struct lkl_dev_net_ops vde_net_ops = { .tx = net_vde_tx, .rx = net_vde_rx, .poll = net_vde_poll, + .close = net_vde_close, }; int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) { int ret; - struct lkl_netdev_vde *nd_vde; + struct lkl_netdev_vde *nd_vde = + container_of(nd, struct lkl_netdev_vde, dev); void *data = iov[0].addr; int len = (int)iov[0].len; - nd_vde = (struct lkl_netdev_vde *) nd; - ret = vde_send(nd_vde->conn, data, len, 0); if (ret <= 0 && errno == EAGAIN) return -1; @@ -46,12 +48,11 @@ int net_vde_tx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) int net_vde_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) { int ret; - struct lkl_netdev_vde *nd_vde; + struct lkl_netdev_vde *nd_vde = + container_of(nd, struct lkl_netdev_vde, dev); void *data = iov[0].addr; int len = (int)iov[0].len; - nd_vde = (struct lkl_netdev_vde *) nd; - /* * Due to a bug in libvdeplug we have to first poll to make sure * that there is data available. @@ -60,24 +61,22 @@ int net_vde_rx(struct lkl_netdev *nd, struct lkl_dev_buf *iov, int cnt) * This should be changed once libvdeplug is fixed. */ ret = 0; - if (net_vde_poll_with_timeout(nd, LKL_DEV_NET_POLL_RX, 0) & - LKL_DEV_NET_POLL_RX) + if (net_vde_poll_with_timeout(nd, 0) & LKL_DEV_NET_POLL_RX) ret = vde_recv(nd_vde->conn, data, len, 0); if (ret <= 0) return -1; return ret; } -int net_vde_poll_with_timeout(struct lkl_netdev *nd, int events, int timeout) +int net_vde_poll_with_timeout(struct lkl_netdev *nd, int timeout) { int ret; - struct lkl_netdev_vde *nd_vde; - - nd_vde = (struct lkl_netdev_vde *) nd; - + struct lkl_netdev_vde *nd_vde = + container_of(nd, struct lkl_netdev_vde, dev); struct pollfd pollfds[] = { { .fd = vde_datafd(nd_vde->conn), + .events = POLLIN | POLLOUT, }, { .fd = vde_ctlfd(nd_vde->conn), @@ -85,20 +84,15 @@ int net_vde_poll_with_timeout(struct lkl_netdev *nd, int events, int timeout) } }; - if (events & LKL_DEV_NET_POLL_RX) - pollfds[0].events |= POLLIN; - if (events & LKL_DEV_NET_POLL_TX) - pollfds[0].events |= POLLOUT; - while (poll(pollfds, 2, timeout) < 0 && errno == EINTR) ; ret = 0; if (pollfds[1].revents & (POLLHUP | POLLNVAL | POLLIN)) - return -1; + return LKL_DEV_NET_POLL_HUP; if (pollfds[0].revents & (POLLHUP | POLLNVAL)) - return -1; + return LKL_DEV_NET_POLL_HUP; if (pollfds[0].revents & POLLIN) ret |= LKL_DEV_NET_POLL_RX; @@ -108,9 +102,17 @@ int net_vde_poll_with_timeout(struct lkl_netdev *nd, int events, int timeout) return ret; } -int net_vde_poll(struct lkl_netdev *nd, int events) +int net_vde_poll(struct lkl_netdev *nd) +{ + return net_vde_poll_with_timeout(nd, -1); +} + +void net_vde_close(struct lkl_netdev *nd) { - return net_vde_poll_with_timeout(nd, events, -1); + struct lkl_netdev_vde *nd_vde = + container_of(nd, struct lkl_netdev_vde, dev); + + vde_close(nd_vde->conn); } struct lkl_netdev *lkl_netdev_vde_create(char const *switch_path) @@ -119,13 +121,13 @@ struct lkl_netdev *lkl_netdev_vde_create(char const *switch_path) struct vde_open_args open_args = {.port = 0, .group = 0, .mode = 0700 }; char *switch_path_copy = 0; - nd = (struct lkl_netdev_vde *)malloc(sizeof(*nd)); + nd = malloc(sizeof(*nd)); if (!nd) { fprintf(stderr, "Failed to allocate memory.\n"); /* TODO: propagate the error state, maybe use errno? */ return 0; } - nd->ops = &vde_net_ops; + nd->dev.ops = &vde_net_ops; /* vde_open() allows the null pointer as path which means * "VDE default path" @@ -153,7 +155,7 @@ struct lkl_netdev *lkl_netdev_vde_create(char const *switch_path) return 0; } - return (struct lkl_netdev *)nd; + return &nd->dev; } #else /* CONFIG_AUTO_LKL_VIRTIO_NET_VDE */