Skip to content

Commit

Permalink
conn, device, tun: implement vectorized I/O on Linux
Browse files Browse the repository at this point in the history
This commit implements TCP offloading via TSO and GRO for the Linux
tun.Device, which is made possible by virtio extensions in the Kernel's
TUN driver.

conn.LinuxSocketEndpoint has been deleted in favor of a collapsed
conn.StdNetBind. conn.StdNetBind makes use of recvmmsg() and sendmmsg()
on Linux. All platforms now fall under conn.StdNetBind, except for
Windows, which remains in conn.WinRingBind.

Sticky sockets support has been refactored as part of this work to
eventually be applicable on platforms other than just Linux, however
Linux remains the sole platform that fully implements it.

Signed-off-by: Jordan Whited <[email protected]>
Signed-off-by: James Tucker <[email protected]>
Co-authored-by: James Tucker <[email protected]>
  • Loading branch information
jwhited and raggi committed Mar 2, 2023
1 parent d115be4 commit e6bfbdf
Show file tree
Hide file tree
Showing 24 changed files with 1,870 additions and 787 deletions.
587 changes: 0 additions & 587 deletions conn/bind_linux.go

This file was deleted.

339 changes: 239 additions & 100 deletions conn/bind_std.go

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions conn/boundif_android.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

package conn

func (bind *StdNetBind) PeekLookAtSocketFd4() (fd int, err error) {
sysconn, err := bind.ipv4.SyscallConn()
func (s *StdNetBind) PeekLookAtSocketFd4() (fd int, err error) {
sysconn, err := s.ipv4.SyscallConn()
if err != nil {
return -1, err
}
Expand All @@ -19,8 +19,8 @@ func (bind *StdNetBind) PeekLookAtSocketFd4() (fd int, err error) {
return
}

func (bind *StdNetBind) PeekLookAtSocketFd6() (fd int, err error) {
sysconn, err := bind.ipv6.SyscallConn()
func (s *StdNetBind) PeekLookAtSocketFd6() (fd int, err error) {
sysconn, err := s.ipv6.SyscallConn()
if err != nil {
return -1, err
}
Expand Down
2 changes: 1 addition & 1 deletion conn/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
)

const (
DefaultBatchSize = 1 // maximum number of packets handled per read and write
DefaultBatchSize = 128 // maximum number of packets handled per read and write
)

// A ReceiveFunc receives at least one packet from the network and writes them
Expand Down
36 changes: 36 additions & 0 deletions conn/controlfns.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* SPDX-License-Identifier: MIT
*
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
*/

package conn

import (
"net"
"syscall"
)

// controlFn is the callback function signature from net.ListenConfig.Control.
// It is used to apply platform specific configuration to the socket prior to
// bind.
type controlFn func(network, address string, c syscall.RawConn) error

// controlFns is a list of functions that are called from the listen config
// that can apply socket options.
var controlFns = []controlFn{}

// listenConfig returns a net.ListenConfig that applies the controlFns to the
// socket prior to bind. This is used to apply socket buffer sizing and packet
// information OOB configuration for sticky sockets.
func listenConfig() *net.ListenConfig {
return &net.ListenConfig{
Control: func(network, address string, c syscall.RawConn) error {
for _, fn := range controlFns {
if err := fn(network, address, c); err != nil {
return err
}
}
return nil
},
}
}
41 changes: 41 additions & 0 deletions conn/controlfns_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* SPDX-License-Identifier: MIT
*
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
*/

package conn

import (
"fmt"
"syscall"

"golang.org/x/sys/unix"
)

func init() {
controlFns = append(controlFns,

// Enable receiving of the packet information (IP_PKTINFO for IPv4,
// IPV6_PKTINFO for IPv6) that is used to implement sticky socket support.
func(network, address string, c syscall.RawConn) error {
var err error
switch network {
case "udp4":
c.Control(func(fd uintptr) {
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_PKTINFO, 1)
})
case "udp6":
c.Control(func(fd uintptr) {
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVPKTINFO, 1)
if err != nil {
return
}
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, 1)
})
default:
err = fmt.Errorf("unhandled network: %s: %w", network, unix.EINVAL)
}
return err
},
)
}
28 changes: 28 additions & 0 deletions conn/controlfns_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//go:build !windows && !linux && !js

/* SPDX-License-Identifier: MIT
*
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
*/

package conn

import (
"syscall"

"golang.org/x/sys/unix"
)

func init() {
controlFns = append(controlFns,
func(network, address string, c syscall.RawConn) error {
var err error
if network == "udp6" {
c.Control(func(fd uintptr) {
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, 1)
})
}
return err
},
)
}
2 changes: 1 addition & 1 deletion conn/default.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//go:build !linux && !windows
//go:build !windows

/* SPDX-License-Identifier: MIT
*
Expand Down
2 changes: 1 addition & 1 deletion conn/mark_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@

package conn

func (bind *StdNetBind) SetMark(mark uint32) error {
func (s *StdNetBind) SetMark(mark uint32) error {
return nil
}
10 changes: 5 additions & 5 deletions conn/mark_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ func init() {
}
}

func (bind *StdNetBind) SetMark(mark uint32) error {
func (s *StdNetBind) SetMark(mark uint32) error {
var operr error
if fwmarkIoctl == 0 {
return nil
}
if bind.ipv4 != nil {
fd, err := bind.ipv4.SyscallConn()
if s.ipv4 != nil {
fd, err := s.ipv4.SyscallConn()
if err != nil {
return err
}
Expand All @@ -46,8 +46,8 @@ func (bind *StdNetBind) SetMark(mark uint32) error {
return err
}
}
if bind.ipv6 != nil {
fd, err := bind.ipv6.SyscallConn()
if s.ipv6 != nil {
fd, err := s.ipv6.SyscallConn()
if err != nil {
return err
}
Expand Down
26 changes: 26 additions & 0 deletions conn/sticky_default.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//go:build !linux
// +build !linux

/* SPDX-License-Identifier: MIT
*
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
*/

package conn

// TODO: macOS, FreeBSD and other BSDs likely do support this feature set, but
// use alternatively named flags and need ports and require testing.

// getSrcFromControl parses the control for PKTINFO and if found updates ep with
// the source information found.
func getSrcFromControl(control []byte, ep *StdNetEndpoint) {
}

// setSrcControl parses the control for PKTINFO and if found updates ep with
// the source information found.
func setSrcControl(control *[]byte, ep *StdNetEndpoint) {
}

// srcControlSize returns the recommended buffer size for pooling sticky control
// data.
const srcControlSize = 0
111 changes: 111 additions & 0 deletions conn/sticky_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/* SPDX-License-Identifier: MIT
*
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
*/

package conn

import (
"net/netip"
"unsafe"

"golang.org/x/sys/unix"
)

// getSrcFromControl parses the control for PKTINFO and if found updates ep with
// the source information found.
func getSrcFromControl(control []byte, ep *StdNetEndpoint) {
ep.ClearSrc()

var (
hdr unix.Cmsghdr
data []byte
rem []byte = control
err error
)

for len(rem) > unix.SizeofCmsghdr {
hdr, data, rem, err = unix.ParseOneSocketControlMessage(control)
if err != nil {
return
}

if hdr.Level == unix.IPPROTO_IP &&
hdr.Type == unix.IP_PKTINFO {

info := pktInfoFromBuf[unix.Inet4Pktinfo](data)
ep.src.Addr = netip.AddrFrom4(info.Spec_dst)
ep.src.ifidx = info.Ifindex

return
}

if hdr.Level == unix.IPPROTO_IPV6 &&
hdr.Type == unix.IPV6_PKTINFO {

info := pktInfoFromBuf[unix.Inet6Pktinfo](data)
ep.src.Addr = netip.AddrFrom16(info.Addr)
ep.src.ifidx = int32(info.Ifindex)

return
}
}
}

// pktInfoFromBuf returns type T populated from the provided buf via copy(). It
// panics if buf is of insufficient size.
func pktInfoFromBuf[T unix.Inet4Pktinfo | unix.Inet6Pktinfo](buf []byte) (t T) {
size := int(unsafe.Sizeof(t))
if len(buf) < size {
panic("pktInfoFromBuf: buffer too small")
}
copy(unsafe.Slice((*byte)(unsafe.Pointer(&t)), size), buf)
return t
}

// setSrcControl parses the control for PKTINFO and if found updates ep with
// the source information found.
func setSrcControl(control *[]byte, ep *StdNetEndpoint) {
*control = (*control)[:cap(*control)]
if len(*control) < int(unsafe.Sizeof(unix.Cmsghdr{})) {
*control = (*control)[:0]
return
}

if ep.src.ifidx == 0 && !ep.SrcIP().IsValid() {
*control = (*control)[:0]
return
}

if len(*control) < srcControlSize {
*control = (*control)[:0]
return
}

hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(*control)[0]))
if ep.SrcIP().Is4() {
hdr.Level = unix.IPPROTO_IP
hdr.Type = unix.IP_PKTINFO
hdr.SetLen(unix.CmsgLen(unix.SizeofInet4Pktinfo))

info := (*unix.Inet4Pktinfo)(unsafe.Pointer(&(*control)[unix.SizeofCmsghdr]))
info.Ifindex = ep.src.ifidx
if ep.SrcIP().IsValid() {
info.Spec_dst = ep.SrcIP().As4()
}
} else {
hdr.Level = unix.IPPROTO_IPV6
hdr.Type = unix.IPV6_PKTINFO
hdr.Len = unix.SizeofCmsghdr + unix.SizeofInet6Pktinfo

info := (*unix.Inet6Pktinfo)(unsafe.Pointer(&(*control)[unix.SizeofCmsghdr]))
info.Ifindex = uint32(ep.src.ifidx)
if ep.SrcIP().IsValid() {
info.Addr = ep.SrcIP().As16()
}
}

*control = (*control)[:hdr.Len]
}

var srcControlSize = unix.CmsgLen(unix.SizeofInet6Pktinfo)
Loading

0 comments on commit e6bfbdf

Please sign in to comment.