Skip to content
This repository has been archived by the owner on Apr 3, 2018. It is now read-only.

Commit

Permalink
Networking: Add support for multi-queue mactap
Browse files Browse the repository at this point in the history
Provide multiple methods to connect the Virtual machine
to the container network. The current implementation allows
this to be chosen at a node level. In the future we can
enhance this to be dynamic, where the container interface
type is used to determine the optimal interconnection method.

Add support for multi-queue macvtap as an alternate means to
connect the container network interface to the virtual machine.

Signed-off-by: Manohar Castelino <[email protected]>
  • Loading branch information
mcastelino committed Oct 9, 2017
1 parent 1eeac22 commit d98bfe8
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 5 deletions.
223 changes: 219 additions & 4 deletions network.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,38 @@ import (
"golang.org/x/sys/unix"
)

// Introduces constants related to network routes.
// NetworkInterworkingModel defines the type of container to
// Virtcontainer network interworking models
type NetInterworkingModel int

const (
// ModelBridged uses a linux bridge to interconnect
// the container interface to the VM. This is the
// safe default that works for most cases except
// macvlan and ipvlan
ModelBridged NetInterworkingModel = iota

// ModelMacVtap can be used when the Container network
// interface can be bridged using macvtap
ModelMacVtap

// ModelEnlightened can be used when the Network plugins
// are enlightened to create VM native interfaces
// when requested by the runtime
ModelEnlightened
)

// DefaultNetInterworkingModel is a package level default
// that determines how the VM should be connected to the
// the container network interface
var DefaultNetInterworkingModel NetInterworkingModel = ModelMacVtap

// Introduces constants related to networking
const (
defaultRouteDest = "0.0.0.0/0"
defaultRouteLabel = "default"
defaultFilePerms = 0600
defaultQlen = 1500
)

type netIfaceAddrs struct {
Expand All @@ -47,12 +75,14 @@ type NetworkInterface struct {
HardAddr string
}

// NetworkInterfacePair defines a pair between TAP and virtual network interfaces.
// NetworkInterfacePair defines a pair between VM and virtual network interfaces.
type NetworkInterfacePair struct {
ID string
Name string
VirtIface NetworkInterface
TAPIface NetworkInterface
NetInterworkingModel
VmFds []*os.File
}

// NetworkConfig is the network configuration related to a network.
Expand Down Expand Up @@ -159,7 +189,7 @@ func runNetworkCommon(networkNSPath string, cb func() error) error {
func addNetworkCommon(pod Pod, networkNS *NetworkNamespace) error {
err := doNetNS(networkNS.NetNsPath, func(_ ns.NetNS) error {
for idx := range networkNS.Endpoints {
if err := bridgeNetworkPair(&(networkNS.Endpoints[idx].NetPair)); err != nil {
if err := xconnectVMNetwork(&(networkNS.Endpoints[idx].NetPair), true); err != nil {
return err
}
}
Expand All @@ -176,7 +206,7 @@ func addNetworkCommon(pod Pod, networkNS *NetworkNamespace) error {
func removeNetworkCommon(networkNS NetworkNamespace) error {
return doNetNS(networkNS.NetNsPath, func(_ ns.NetNS) error {
for _, endpoint := range networkNS.Endpoints {
err := unBridgeNetworkPair(endpoint.NetPair)
err := xconnectVMNetwork(&(endpoint.NetPair), false)
if err != nil {
return err
}
Expand All @@ -200,6 +230,21 @@ func createLink(netHandle *netlink.Handle, name string, expectedLink netlink.Lin
LinkAttrs: netlink.LinkAttrs{Name: name},
Mode: netlink.TUNTAP_MODE_TAP,
}
case (&netlink.Macvtap{}).Type():
qlen := expectedLink.Attrs().TxQLen
if qlen <= 0 {
qlen = defaultQlen
}
newLink = &netlink.Macvtap{
netlink.Macvlan{
Mode: netlink.MACVLAN_MODE_BRIDGE,
LinkAttrs: netlink.LinkAttrs{
Name: name,
TxQLen: qlen,
ParentIndex: expectedLink.Attrs().ParentIndex,
},
},
}
default:
return nil, fmt.Errorf("Unsupported link type %s", expectedLink.Type())
}
Expand Down Expand Up @@ -230,13 +275,154 @@ func getLinkByName(netHandle *netlink.Handle, name string, expectedLink netlink.
if l, ok := link.(*netlink.Veth); ok {
return l, nil
}
case (&netlink.Macvtap{}).Type():
if l, ok := link.(*netlink.Macvtap); ok {
return l, nil
}
default:
return nil, fmt.Errorf("Unsupported link type %s", expectedLink.Type())
}

return nil, fmt.Errorf("Incorrect link type %s, expecting %s", link.Type(), expectedLink.Type())
}

func xconnectVMNetwork(netPair *NetworkInterfacePair, connect bool) error {
switch DefaultNetInterworkingModel {
case ModelBridged:
netPair.NetInterworkingModel = ModelBridged
if connect {
return bridgeNetworkPair(netPair)
} else {
return unBridgeNetworkPair(*netPair)
}
case ModelMacVtap:
netPair.NetInterworkingModel = ModelMacVtap
if connect {
return tapNetworkPair(netPair)
} else {
return untapNetworkPair(*netPair)
}
case ModelEnlightened:
return fmt.Errorf("Unsupported networking model")
default:
return fmt.Errorf("Invalid networking model")
}
}

func cleanupFds(fds []*os.File, numFds int) {

maxFds := len(fds)

if numFds < maxFds {
maxFds = numFds
}

for i := 0; i < maxFds; i++ {
_ = fds[i].Close()
}
}

func createMacvtapFds(linkIndex int, queues int) ([]*os.File, error) {
fds := make([]*os.File, queues)

//mq support
for q := 0; q < queues; q++ {

tapDev := fmt.Sprintf("/dev/tap%d", linkIndex)

f, err := os.OpenFile(tapDev, os.O_RDWR, defaultFilePerms)
if err != nil {
cleanupFds(fds, q)
return nil, err
}
fds[q] = f
}

return fds, nil
}

func tapNetworkPair(netPair *NetworkInterfacePair) error {
netHandle, err := netlink.NewHandle()
if err != nil {
return err
}
defer netHandle.Delete()

vethLink, err := getLinkByName(netHandle, netPair.VirtIface.Name, &netlink.Veth{})
if err != nil {
return fmt.Errorf("Could not get veth interface: %s", err)
}
vethLinkAttrs := vethLink.Attrs()

// Attach the macvtap interface to the underlying container
// interface. Also picks relevant attributes from the parent
tapLink, err := createLink(netHandle, netPair.TAPIface.Name,
&netlink.Macvtap{
netlink.Macvlan{
LinkAttrs: netlink.LinkAttrs{
TxQLen: vethLinkAttrs.TxQLen,
ParentIndex: vethLinkAttrs.Index,
},
},
})

if err != nil {
return fmt.Errorf("Could not create TAP interface: %s", err)
}

// Save the veth MAC address to the TAP so that it can later be used
// to build the hypervisor command line. This MAC address has to be
// the one inside the VM in order to avoid any firewall issues. The
// bridge created by the network plugin on the host actually expects
// to see traffic from this MAC address and not another one.
tapHardAddr := vethLinkAttrs.HardwareAddr
netPair.TAPIface.HardAddr = vethLinkAttrs.HardwareAddr.String()

if err := netHandle.LinkSetMTU(tapLink, vethLinkAttrs.MTU); err != nil {
return fmt.Errorf("Could not set TAP MTU %d: %s", vethLinkAttrs.MTU, err)
}

hardAddr, err := net.ParseMAC(netPair.VirtIface.HardAddr)
if err != nil {
return err
}
if err := netHandle.LinkSetHardwareAddr(vethLink, hardAddr); err != nil {
return fmt.Errorf("Could not set MAC address %s for veth interface %s: %s",
netPair.VirtIface.HardAddr, netPair.VirtIface.Name, err)
}

if err := netHandle.LinkSetUp(vethLink); err != nil {
return fmt.Errorf("Could not enable veth %s: %s", netPair.VirtIface.Name, err)
}

if err := netHandle.LinkSetHardwareAddr(tapLink, tapHardAddr); err != nil {
return fmt.Errorf("Could not set MAC address %s for veth interface %s: %s",
netPair.VirtIface.HardAddr, netPair.VirtIface.Name, err)
}

if err := netHandle.LinkSetUp(tapLink); err != nil {
return fmt.Errorf("Could not enable TAP %s: %s", netPair.TAPIface.Name, err)
}

// Note: The underlying intefaces need to be up prior to fd creation.

// Setup the multiqueue fds to be consumed by QEMU as macvtap cannot
// be directly connected.
// Ideally we want
// netdev.FDs, err = createMacvtapFds(netdev.ID, int(config.SMP.CPUs))

// We do not have global context here, hence a manifest constant
// that matches our minimum vCPU configuration
// Another option is to defer this to ciao qemu library which does have
// global context but cannot handle errors when setting up the network
netPair.VmFds, err = createMacvtapFds(tapLink.Attrs().Index, 2)
if err != nil {
return fmt.Errorf("Could not setup macvtap fds %s: %s", netPair.TAPIface, err)
}

return nil
}

func bridgeNetworkPair(netPair *NetworkInterfacePair) error {
netHandle, err := netlink.NewHandle()
if err != nil {
Expand Down Expand Up @@ -307,6 +493,35 @@ func bridgeNetworkPair(netPair *NetworkInterfacePair) error {
return nil
}

func untapNetworkPair(netPair NetworkInterfacePair) error {
netHandle, err := netlink.NewHandle()
if err != nil {
return err
}
defer netHandle.Delete()

tapLink, err := getLinkByName(netHandle, netPair.TAPIface.Name, &netlink.Tuntap{})
if err != nil {
return fmt.Errorf("Could not get TAP interface: %s", err)
}

if err := netHandle.LinkDel(tapLink); err != nil {
return fmt.Errorf("Could not remove TAP %s: %s", netPair.TAPIface.Name, err)
}

vethLink, err := getLinkByName(netHandle, netPair.VirtIface.Name, &netlink.Veth{})
if err != nil {
// The veth pair is not totally managed by virtcontainers
virtLog.Warn("Could not get veth interface: %s", err)
} else {
if err := netHandle.LinkSetDown(vethLink); err != nil {
return fmt.Errorf("Could not disable veth %s: %s", netPair.VirtIface.Name, err)
}
}

return nil
}

func unBridgeNetworkPair(netPair NetworkInterfacePair) error {
netHandle, err := netlink.NewHandle()
if err != nil {
Expand Down
20 changes: 19 additions & 1 deletion qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,28 @@ func (q *qemu) appendSocket(devices []ciaoQemu.Device, socket Socket) []ciaoQemu
return devices
}

func networkModelToQemuType(model NetInterworkingModel) ciaoQemu.NetDeviceType {
switch model {
case ModelBridged:
return ciaoQemu.TAP
case ModelMacVtap:
return ciaoQemu.MACVTAP
//case ModelEnlightened:
// Here the Network plugin will create a VM native interface
// which could be MacVtap, IpVtap, SRIOV, veth-tap, vhost-user
// In these cases we will determine the interface type here
// and pass in the native interface through
default:
//TAP should work for most other cases
return ciaoQemu.TAP
}
}

func (q *qemu) appendNetworks(devices []ciaoQemu.Device, endpoints []Endpoint) []ciaoQemu.Device {
for idx, endpoint := range endpoints {
devices = append(devices,
ciaoQemu.NetDevice{
Type: ciaoQemu.TAP,
Type: networkModelToQemuType(endpoint.NetPair.NetInterworkingModel),
Driver: ciaoQemu.VirtioNetPCI,
ID: fmt.Sprintf("network-%d", idx),
IFName: endpoint.NetPair.TAPIface.Name,
Expand All @@ -305,6 +322,7 @@ func (q *qemu) appendNetworks(devices []ciaoQemu.Device, endpoints []Endpoint) [
Script: "no",
VHost: true,
DisableModern: q.nestedRun,
FDs: endpoint.NetPair.VmFds,
},
)
}
Expand Down

0 comments on commit d98bfe8

Please sign in to comment.