From 0f8896589f9c0012b1fc0a6ffb6d06f38616d4cd Mon Sep 17 00:00:00 2001 From: Lucas Manning Date: Fri, 31 Jan 2025 12:22:46 -0800 Subject: [PATCH] Implement the PacketMMapEndpoint interface for PACKET_MMAP. This code is tested in the subsequent change with syscall tests. PiperOrigin-RevId: 721861305 --- pkg/sentry/socket/netstack/packetmmap/BUILD | 37 ++ .../socket/netstack/packetmmap/endpoint.go | 399 ++++++++++++++++++ .../netstack/packetmmap/endpoint_state.go | 31 ++ .../socket/netstack/packetmmap/ring_buffer.go | 246 +++++++++++ 4 files changed, 713 insertions(+) create mode 100644 pkg/sentry/socket/netstack/packetmmap/BUILD create mode 100644 pkg/sentry/socket/netstack/packetmmap/endpoint.go create mode 100644 pkg/sentry/socket/netstack/packetmmap/endpoint_state.go create mode 100644 pkg/sentry/socket/netstack/packetmmap/ring_buffer.go diff --git a/pkg/sentry/socket/netstack/packetmmap/BUILD b/pkg/sentry/socket/netstack/packetmmap/BUILD new file mode 100644 index 0000000000..5eef25b6a5 --- /dev/null +++ b/pkg/sentry/socket/netstack/packetmmap/BUILD @@ -0,0 +1,37 @@ +load("//tools:defs.bzl", "go_library") + +package( + default_applicable_licenses = ["//:license"], + licenses = ["notice"], +) + +go_library( + name = "packetmmap", + srcs = [ + "endpoint.go", + "endpoint_state.go", + "ring_buffer.go", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + "//pkg/abi/linux", + "//pkg/atomicbitops", + "//pkg/bitmap", + "//pkg/buffer", + "//pkg/context", + "//pkg/errors/linuxerr", + "//pkg/hostarch", + "//pkg/safemem", + "//pkg/sentry/memmap", + "//pkg/sentry/pgalloc", + "//pkg/sentry/socket", + "//pkg/sentry/usage", + "//pkg/sync", + "//pkg/tcpip", + "//pkg/tcpip/header", + "//pkg/tcpip/stack", + "//pkg/waiter", + ], +) diff --git a/pkg/sentry/socket/netstack/packetmmap/endpoint.go b/pkg/sentry/socket/netstack/packetmmap/endpoint.go new file mode 100644 index 0000000000..9e779ab68f --- /dev/null +++ b/pkg/sentry/socket/netstack/packetmmap/endpoint.go @@ -0,0 +1,399 @@ +// Copyright 2025 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package packetmmap contains the packet mmap implementation for netstack. +// +// See https://docs.kernel.org/networking/packet_mmap.html for a full +// description of the PACKET_MMAP interface. +package packetmmap + +import ( + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/atomicbitops" + "gvisor.dev/gvisor/pkg/buffer" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" + "gvisor.dev/gvisor/pkg/waiter" +) + +var _ stack.PacketMMapEndpoint = (*Endpoint)(nil) +var _ memmap.Mappable = (*Endpoint)(nil) + +// ringBufferMode is the mode of a packet ring buffer. +type ringBufferMode uint + +const ( + rxRingBuffer ringBufferMode = 1 << iota + txRingBuffer +) + +// Endpoint is a memmap.Mappable implementation for stack.PacketMMapEndpoint. It +// implements the PACKET_MMAP interface as described in +// https://docs.kernel.org/networking/packet_mmap.html. +// +// +stateify savable +type Endpoint struct { + // mu protects specific fields within ringBuffer, see the ringBuffer + // type for more details. + mu sync.Mutex `state:"nosave"` + rxRingBuffer ringBuffer + txRingBuffer ringBuffer + + mapped atomicbitops.Uint32 + + cooked bool + packetEP stack.MappablePacketEndpoint + mode ringBufferMode + nicID tcpip.NICID + netProto tcpip.NetworkProtocolNumber + headerLen uint32 + + stack *stack.Stack + stats *tcpip.TransportEndpointStats + wq *waiter.Queue + + mappingsMu sync.Mutex `state:"nosave"` + // +checklocks:mappingsMu + mappings memmap.MappingSet +} + +// Init initializes the endpoint. It is called when the endpoint is created +// during setsockopt(PACKET_(RX|TX)_RING) with the options retrieved from its +// corresponding packet socket. +func (m *Endpoint) Init(ctx context.Context, opts stack.PacketMMapOpts) error { + m.stack = opts.Stack + m.wq = opts.Wq + m.cooked = opts.Cooked + m.packetEP = opts.PacketEndpoint + m.stats = opts.Stats + m.nicID = opts.NICID + m.netProto = opts.NetProto + m.headerLen = linux.TPACKET_HDRLEN + if opts.Req.TpBlockNr != 0 { + if opts.Req.TpBlockSize <= 0 { + return linuxerr.EINVAL + } + if opts.Req.TpBlockSize%hostarch.PageSize != 0 { + return linuxerr.EINVAL + } + if opts.Req.TpFrameSize < m.headerLen { + return linuxerr.EINVAL + } + if opts.Req.TpFrameSize&(linux.TPACKET_ALIGNMENT-1) != 0 { + return linuxerr.EINVAL + } + framesPerBlock := opts.Req.TpBlockSize / opts.Req.TpFrameSize + if framesPerBlock == 0 { + return linuxerr.EINVAL + } + if framesPerBlock > ^uint32(0)/opts.Req.TpFrameSize { + return linuxerr.EINVAL + } + if framesPerBlock*opts.Req.TpBlockNr != opts.Req.TpFrameNr { + return linuxerr.EINVAL + } + } else if opts.Req.TpFrameNr != 0 { + return linuxerr.EINVAL + } + if opts.IsRx { + if err := m.rxRingBuffer.init(ctx, opts.Req); err != nil { + return err + } + m.mode |= rxRingBuffer + } else { + if err := m.txRingBuffer.init(ctx, opts.Req); err != nil { + return err + } + m.mode |= txRingBuffer + } + return nil +} + +// Close implements stack.PacketMMapEndpoint.Close. +func (m *Endpoint) Close() { + m.mu.Lock() + defer m.mu.Unlock() + if m.mode&rxRingBuffer != 0 { + m.rxRingBuffer.destroy() + } + if m.mode&txRingBuffer != 0 { + m.txRingBuffer.destroy() + } + m.mapped.Store(0) +} + +// Readiness implements stack.PacketMmapEndpoint.Readiness. +func (m *Endpoint) Readiness(mask waiter.EventMask) waiter.EventMask { + m.mu.Lock() + defer m.mu.Unlock() + result := waiter.WritableEvents & mask + if m.mode&rxRingBuffer != 0 { + st, err := m.rxRingBuffer.prevFrameStatus() + if err != nil { + return result + } + if st != linux.TP_STATUS_KERNEL { + result |= waiter.ReadableEvents + } + } + return result +} + +// HandlePacket implements stack.PacketMMapEndpoint.HandlePacket. +func (m *Endpoint) HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *stack.PacketBuffer) { + const minMacLen = 16 + var ( + status = uint32(linux.TP_STATUS_USER) + macOffset, netOffset, dataLength uint32 + clone *stack.PacketBuffer + ) + + m.mu.Lock() + if !m.rxRingBuffer.hasRoom() { + m.mu.Unlock() + m.stack.Stats().DroppedPackets.Increment() + return + } + m.mu.Unlock() + + if pkt.GSOOptions.Type != stack.GSONone && pkt.GSOOptions.NeedsCsum { + status |= linux.TP_STATUS_CSUM_NOT_READY + } + if pkt.GSOOptions.Type == stack.GSOTCPv4 || pkt.GSOOptions.Type == stack.GSOTCPv6 { + status |= linux.TP_STATUS_GSO_TCP + } + + pktBuf := pkt.ToBuffer() + if m.cooked { + pktBuf.TrimFront(int64(len(pkt.LinkHeader().Slice()) + len(pkt.VirtioNetHeader().Slice()))) + // Cooked packet endpoints don't include the link-headers in received + // packets. + netOffset = linux.TPacketAlign(m.headerLen + minMacLen) + macOffset = netOffset + } else { + virtioNetHdrLen := uint32(len(pkt.VirtioNetHeader().Slice())) + macLen := uint32(len(pkt.LinkHeader().Slice())) + virtioNetHdrLen + netOffset = linux.TPacketAlign(m.headerLen + macLen) + if macLen < minMacLen { + netOffset = linux.TPacketAlign(m.headerLen + minMacLen) + } + if virtioNetHdrLen > 0 { + netOffset += virtioNetHdrLen + } + macOffset = netOffset - macLen + } + if netOffset > uint32(^uint16(0)) { + m.stack.Stats().DroppedPackets.Increment() + return + } + dataLength = uint32(pktBuf.Size()) + + // If the packet is too large to fit in the ring buffer, copy it to the + // receive queue. + if macOffset+dataLength > m.rxRingBuffer.frameSize { + clone = pkt.Clone() + defer clone.DecRef() + dataLength = m.rxRingBuffer.frameSize - macOffset + if int(dataLength) < 0 { + dataLength = 0 + } + } + + m.mu.Lock() + tpStatus, err := m.rxRingBuffer.currFrameStatus() + if err != nil || tpStatus != linux.TP_STATUS_KERNEL { + m.mu.Unlock() + m.stack.Stats().DroppedPackets.Increment() + return + } + + slot, ok := m.rxRingBuffer.testAndMarkHead() + if !ok { + m.mu.Unlock() + m.stack.Stats().DroppedPackets.Increment() + return + } + m.rxRingBuffer.incHead() + + if clone != nil { + status |= linux.TP_STATUS_COPY + m.packetEP.HandlePacketMMapCopy(nicID, netProto, clone) + } + m.mu.Unlock() + + // Unlock around writing to the internal mappings to allow other threads to + // write to the ring buffer. + hdrView := buffer.NewViewSize(int(macOffset)) + m.marshalFrameHeader(pktBuf, macOffset, netOffset, dataLength, hdrView) + pktBuf.Truncate(int64(dataLength)) + m.marshalSockAddr(pkt, hdrView) + + if err := m.rxRingBuffer.writeFrame(slot, hdrView, pktBuf); err != nil { + m.stack.Stats().DroppedPackets.Increment() + return + } + + m.mu.Lock() + defer m.mu.Unlock() + if err := m.rxRingBuffer.writeStatus(slot, status); err != nil { + m.stack.Stats().DroppedPackets.Increment() + return + } + m.stats.PacketsReceived.Increment() + m.wq.Notify(waiter.ReadableEvents) +} + +// AddMapping implements memmap.Mappable.AddMapping. +func (m *Endpoint) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { + m.mappingsMu.Lock() + defer m.mappingsMu.Unlock() + m.mappings.AddMapping(ms, ar, offset, writable) + return nil +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +func (m *Endpoint) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { + m.mappingsMu.Lock() + defer m.mappingsMu.Unlock() + m.mappings.RemoveMapping(ms, ar, offset, writable) +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +func (m *Endpoint) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { + m.mappingsMu.Lock() + defer m.mappingsMu.Unlock() + m.mappings.AddMapping(ms, dstAR, offset, writable) + return nil +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +func (*Endpoint) InvalidateUnsavable(context.Context) error { + return nil +} + +// Translate implements memmap.Mappable.Translate. +func (m *Endpoint) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { + translationSize := 0 + if m.mode&rxRingBuffer != 0 { + translationSize++ + } + if m.mode&txRingBuffer != 0 { + translationSize++ + } + + ts := make([]memmap.Translation, 0, translationSize) + var err error + + if m.mode&rxRingBuffer != 0 { + var rxTranslation memmap.Translation + rxTranslation, err = m.rxRingBuffer.Translate(ctx, required, optional, at) + ts = append(ts, rxTranslation) + } + if m.mode&txRingBuffer != 0 { + // Translate went outside the bounds of the RX ring buffer, which is valid + // if there is also a TX ring buffer. + if err != nil { + if len(ts) > 0 { + required.Start = ts[len(ts)-1].Source.End + optional.Start = ts[len(ts)-1].Source.End + } + } + var txTranslation memmap.Translation + txTranslation, err = m.txRingBuffer.Translate(ctx, required, optional, at) + ts = append(ts, txTranslation) + } + return ts, err +} + +// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. +func (m *Endpoint) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + if opts.Offset != 0 { + return linuxerr.EINVAL + } + var size uint64 + if m.mode&rxRingBuffer != 0 { + size += m.rxRingBuffer.bufferSize() + } + if m.mode&txRingBuffer != 0 { + size += m.txRingBuffer.bufferSize() + } + if size != opts.Length { + return linuxerr.EINVAL + } + m.mapped.Store(1) + return nil +} + +// Mapped returns whether the endpoint has been mapped. +func (m *Endpoint) Mapped() bool { + return m.mapped.Load() != 0 +} + +func toLinuxPacketType(pktType tcpip.PacketType) uint8 { + switch pktType { + case tcpip.PacketHost: + return linux.PACKET_HOST + case tcpip.PacketOtherHost: + return linux.PACKET_OTHERHOST + case tcpip.PacketOutgoing: + return linux.PACKET_OUTGOING + case tcpip.PacketBroadcast: + return linux.PACKET_BROADCAST + case tcpip.PacketMulticast: + return linux.PACKET_MULTICAST + default: + panic(fmt.Sprintf("unknown packet type: %d", pktType)) + } +} + +func (m *Endpoint) marshalSockAddr(pkt *stack.PacketBuffer, view *buffer.View) { + var sll linux.SockAddrLink + sll.Family = linux.AF_PACKET + sll.Protocol = socket.Htons(uint16(m.netProto)) + sll.PacketType = toLinuxPacketType(pkt.PktType) + sll.InterfaceIndex = int32(m.nicID) + sll.HardwareAddrLen = header.EthernetAddressSize + + if len(pkt.LinkHeader().Slice()) != 0 { + hdr := header.Ethernet(pkt.LinkHeader().Slice()) + copy(sll.HardwareAddr[:], hdr.SourceAddress()) + } + hdrSize := uint32((*linux.TpacketHdr)(nil).SizeBytes()) + sll.MarshalBytes(view.AsSlice()[linux.TPacketAlign(hdrSize):]) +} + +func (m *Endpoint) marshalFrameHeader(pktBuf buffer.Buffer, macOffset, netOffset, dataLength uint32, view *buffer.View) { + t := m.stack.Clock().Now() + hdr := linux.TpacketHdr{ + // The status is set separately to ensure the frame is written before the + // status is set. + TpStatus: linux.TP_STATUS_KERNEL, + TpLen: uint32(pktBuf.Size()), + TpSnaplen: dataLength, + TpMac: uint16(macOffset), + TpNet: uint16(netOffset), + TpSec: uint32(t.Unix()), + TpUsec: uint32(t.UnixMicro() % 1e6), + } + hdr.MarshalBytes(view.AsSlice()) +} diff --git a/pkg/sentry/socket/netstack/packetmmap/endpoint_state.go b/pkg/sentry/socket/netstack/packetmmap/endpoint_state.go new file mode 100644 index 0000000000..d716012691 --- /dev/null +++ b/pkg/sentry/socket/netstack/packetmmap/endpoint_state.go @@ -0,0 +1,31 @@ +// Copyright 2025 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packetmmap + +import ( + "context" + "fmt" + + "gvisor.dev/gvisor/pkg/sentry/pgalloc" +) + +// afterLoad is invoked by stateify. +func (rb *ringBuffer) afterLoad(ctx context.Context) { + mf := pgalloc.MemoryFileFromContext(ctx) + if mf == nil { + panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFile)) + } + rb.mf = mf +} diff --git a/pkg/sentry/socket/netstack/packetmmap/ring_buffer.go b/pkg/sentry/socket/netstack/packetmmap/ring_buffer.go new file mode 100644 index 0000000000..86d7664096 --- /dev/null +++ b/pkg/sentry/socket/netstack/packetmmap/ring_buffer.go @@ -0,0 +1,246 @@ +// Copyright 2025 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package packetmmap + +import ( + "fmt" + "io" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/bitmap" + "gvisor.dev/gvisor/pkg/buffer" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/safemem" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/pgalloc" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/tcpip" +) + +// +stateify savable +type ringBuffer struct { + framesPerBlock uint32 + frameSize uint32 + frameMax uint32 + blockSize uint32 + numBlocks uint32 + version int + + // The following fields are protected by the owning endpoint's mutex. + head uint32 + rxOwnerMap bitmap.Bitmap + + dataMu sync.RWMutex `state:"nosave"` + // +checklocks:dataMu + size uint64 + // +checklocks:dataMu + mapping memmap.MappableRange + data memmap.FileRange + + mf *pgalloc.MemoryFile `state:"nosave"` +} + +// init initializes a PacketRingBuffer. +// +// The owning endpoint must be locked when calling this function. +func (rb *ringBuffer) init(ctx context.Context, req *tcpip.TpacketReq) error { + rb.blockSize = req.TpBlockSize + rb.framesPerBlock = req.TpBlockSize / req.TpFrameSize + rb.frameMax = req.TpFrameNr - 1 + rb.frameSize = req.TpFrameSize + rb.numBlocks = req.TpBlockNr + + rb.rxOwnerMap = bitmap.New(req.TpFrameNr) + rb.head = 0 + + rb.dataMu.Lock() + defer rb.dataMu.Unlock() + rb.size = uint64(req.TpBlockSize) * uint64(req.TpBlockNr) + mf := pgalloc.MemoryFileFromContext(ctx) + if mf == nil { + panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFile)) + } + rb.mf = mf + fr, err := rb.mf.Allocate(rb.size, pgalloc.AllocOpts{Kind: usage.Anonymous, MemCgID: pgalloc.MemoryCgroupIDFromContext(ctx)}) + if err != nil { + return err + } + rb.mapping = memmap.MappableRange{Start: 0, End: rb.size} + rb.data = fr + return nil +} + +// destroy destroys the packet ring buffer. +// +// The owning endpoint must be locked when calling this function. +func (rb *ringBuffer) destroy() { + rb.dataMu.Lock() + rb.mf.DecRef(rb.data) + rb.dataMu.Unlock() + *rb = ringBuffer{} +} + +// Translate implements memmap.Mappable.Translate. +func (rb *ringBuffer) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) (memmap.Translation, error) { + rb.dataMu.Lock() + defer rb.dataMu.Unlock() + var beyondEOF bool + if required.End > rb.size { + if required.Start >= rb.size { + return memmap.Translation{}, &memmap.BusError{Err: io.EOF} + } + beyondEOF = true + required.End = rb.size + } + if optional.End > rb.size { + optional.End = rb.size + } + mappableRange := rb.mapping.Intersect(optional) + ts := memmap.Translation{ + Source: mappableRange, + File: rb.mf, + Offset: rb.data.Start + (mappableRange.Start - rb.mapping.Start), + Perms: hostarch.AnyAccess, + } + if beyondEOF { + return ts, &memmap.BusError{Err: io.EOF} + } + return ts, nil +} + +// writeStatus writes the status of a frame to the ring buffer's internal +// mappings at the provided frame number. It also clears the owner map for the +// frame number if setting it to TP_STATUS_USER. +// +// The owning endpoint must be locked when calling this method. +func (rb *ringBuffer) writeStatus(frameNum uint32, status uint32) error { + if status&linux.TP_STATUS_USER != 0 { + rb.rxOwnerMap.Remove(frameNum) + } + ims, err := rb.internalMappingsForFrame(frameNum, hostarch.Write) + if err != nil { + return err + } + // Status is the first uint32 in the frame. It is a uint64 in TPACKET_V1, + // but is a uint32 in TPACKET_V2. In practice status is never larger than a + // uint32 for either version. + _, err = safemem.SwapUint32(ims.Head(), status) + return err +} + +// writeFrame writes a frame to the ring buffer's internal mappings at the +// provided frame number. +func (rb *ringBuffer) writeFrame(frameNum uint32, hdrView *buffer.View, pkt buffer.Buffer) error { + ims, err := rb.internalMappingsForFrame(frameNum, hostarch.Write) + if err != nil { + return err + } + frame := buffer.MakeWithView(hdrView) + frame.Merge(&pkt) + br := frame.AsBufferReader() + defer br.Close() + + rdr := safemem.FromIOReader{Reader: &br} + if _, err = rdr.ReadToBlocks(ims); err != nil { + return err + } + return nil +} + +// incHead increments the head of the ring buffer. +// +// The owning endpoint must be locked when calling this method. +func (rb *ringBuffer) incHead() { + if rb.head == rb.frameMax { + rb.head = 0 + } else { + rb.head++ + } +} + +// currFrameStatus returns the status of the current frame. +// +// The owning endpoint must be locked when calling this method. +func (rb *ringBuffer) currFrameStatus() (uint32, error) { + return rb.frameStatus(rb.head) +} + +// prevFrameStatus returns the status of the frame before the current +// frame. +// +// The owning endpoint must be locked when calling this method. +func (rb *ringBuffer) prevFrameStatus() (uint32, error) { + prev := rb.head - 1 + if rb.head == 0 { + prev = rb.frameMax + } + return rb.frameStatus(prev) +} + +// testAndMarkHead tests whether the head slot is available and marks it +// as owned if it is. +// +// The owning endpoint must be locked when calling this method. +func (rb *ringBuffer) testAndMarkHead() (uint32, bool) { + if firstZero, err := rb.rxOwnerMap.FirstZero(rb.head); err != nil || firstZero != rb.head { + return 0, false + } + rb.rxOwnerMap.Add(rb.head) + return rb.head, true + +} + +// hasRoom returns true if the ring buffer has room for a new frame at head. +// +// The owning endpoint must be locked when calling this method. +func (rb *ringBuffer) hasRoom() bool { + status, err := rb.currFrameStatus() + if err != nil { + return false + } + return status == linux.TP_STATUS_KERNEL +} + +// bufferSize returns the size of the ring buffer in bytes. +func (rb *ringBuffer) bufferSize() uint64 { + rb.dataMu.RLock() + defer rb.dataMu.RUnlock() + return rb.size +} + +func (rb *ringBuffer) internalMappingsForFrame(frameNum uint32, at hostarch.AccessType) (safemem.BlockSeq, error) { + rb.dataMu.RLock() + defer rb.dataMu.RUnlock() + + blockIdx := uint32(frameNum / rb.framesPerBlock) + frameIdx := uint32(frameNum % rb.framesPerBlock) + + frameStart := rb.data.Start + (uint64(blockIdx) * uint64(rb.blockSize)) + (uint64(frameIdx) * uint64(rb.frameSize)) + frameEnd := frameStart + uint64(rb.frameSize) + + frameFR := memmap.FileRange{Start: frameStart, End: frameEnd} + return rb.mf.MapInternal(frameFR, at) +} + +func (rb *ringBuffer) frameStatus(frameNum uint32) (uint32, error) { + ims, err := rb.internalMappingsForFrame(frameNum, hostarch.Read) + if err != nil { + return 0, err + } + // Status is the first uint32 in the frame. + return safemem.LoadUint32(ims.Head()) +}