diff --git a/pkg/abi/nvgpu/frontend.go b/pkg/abi/nvgpu/frontend.go index 0d8dcb96c5..0f80110446 100644 --- a/pkg/abi/nvgpu/frontend.go +++ b/pkg/abi/nvgpu/frontend.go @@ -198,7 +198,7 @@ type NVOS02_PARAMETERS struct { Pad1 [4]byte } -// Bitfields in NVOS02Parameters.Flags: +// Bitfields in NVOS02_PARAMETERS.Flags: const ( NVOS02_FLAGS_ALLOC_SHIFT = 16 NVOS02_FLAGS_ALLOC_MASK = 0x3 @@ -470,6 +470,18 @@ type NVOS33_PARAMETERS struct { Flags uint32 } +// Bitfields in NVOS33_PARAMETERS.Flags: +const ( + NVOS33_FLAGS_CACHING_TYPE_SHIFT = 23 + NVOS33_FLAGS_CACHING_TYPE_MASK = 0x7 + NVOS33_FLAGS_CACHING_TYPE_CACHED = 0 + NVOS33_FLAGS_CACHING_TYPE_UNCACHED = 1 + NVOS33_FLAGS_CACHING_TYPE_WRITECOMBINED = 2 + NVOS33_FLAGS_CACHING_TYPE_WRITEBACK = 5 + NVOS33_FLAGS_CACHING_TYPE_DEFAULT = 6 + NVOS33_FLAGS_CACHING_TYPE_UNCACHED_WEAK = 7 +) + // NVOS34_PARAMETERS is the parameter type for NV_ESC_RM_UNMAP_MEMORY. // // +marshal diff --git a/pkg/hostarch/BUILD b/pkg/hostarch/BUILD index 3508c443fb..cde0764559 100644 --- a/pkg/hostarch/BUILD +++ b/pkg/hostarch/BUILD @@ -38,6 +38,7 @@ go_library( "hostarch.go", "hostarch_arm64.go", "hostarch_x86.go", + "memory_type.go", "sizes_util.go", ], visibility = ["//:sandbox"], diff --git a/pkg/hostarch/memory_type.go b/pkg/hostarch/memory_type.go new file mode 100644 index 0000000000..82d530b9b9 --- /dev/null +++ b/pkg/hostarch/memory_type.go @@ -0,0 +1,84 @@ +// Copyright 2025 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hostarch + +import "fmt" + +// MemoryType specifies CPU memory access behavior. +type MemoryType uint8 + +const ( + // MemoryTypeWriteBack is equivalent to Linux's default pgprot, or the + // following architectural memory types: + // + // - x86: Write-back (WB) + // + // - ARM64: Normal write-back cacheable + // + // This memory type is appropriate for typical application memory and must + // be the zero value for MemoryType. + MemoryTypeWriteBack MemoryType = iota + + // MemoryTypeWriteCombine is equivalent to Linux's pgprot_writecombine(), + // or the following architectural memory types: + // + // - x86: Write-combining (WC) + // + // - ARM64: Normal non-cacheable + MemoryTypeWriteCombine + + // MemoryTypeUncached is equivalent to Linux's pgprot_noncached(), or the + // following architectural memory types: + // + // - x86: Strong Uncacheable (UC) or Uncacheable (UC-); these differ in + // that UC- may be "downgraded" to WC by a setting of WC or (Intel only) WP + // in MTRR or EPT/NPT, but gVisor does not use MTRRs and KVM never sets WC + // or WP in EPT/NPT. + // + // - ARM64: Device-nGnRnE + MemoryTypeUncached + + // NumMemoryTypes is the number of memory types. + NumMemoryTypes +) + +// String implements fmt.Stringer.String. +func (mt MemoryType) String() string { + switch mt { + case MemoryTypeWriteBack: + return "WriteBack" + case MemoryTypeWriteCombine: + return "WriteCombine" + case MemoryTypeUncached: + return "Uncached" + default: + return fmt.Sprintf("%d", mt) + } +} + +// ShortString returns a two-character string compactly representing the +// MemoryType. +func (mt MemoryType) ShortString() string { + switch mt { + case MemoryTypeWriteBack: + return "WB" + case MemoryTypeWriteCombine: + return "WC" + case MemoryTypeUncached: + return "UC" + default: + return fmt.Sprintf("%02d", mt) + } +} diff --git a/pkg/sentry/devices/nvproxy/frontend.go b/pkg/sentry/devices/nvproxy/frontend.go index 9e6a2d1804..171b320bdc 100644 --- a/pkg/sentry/devices/nvproxy/frontend.go +++ b/pkg/sentry/devices/nvproxy/frontend.go @@ -46,8 +46,12 @@ type frontendDevice struct { minor uint32 } +func (dev *frontendDevice) isCtlDevice() bool { + return dev.minor == nvgpu.NV_CONTROL_DEVICE_MINOR +} + func (dev *frontendDevice) basename() string { - if dev.minor == nvgpu.NV_CONTROL_DEVICE_MINOR { + if dev.isCtlDevice() { return "nvidiactl" } return fmt.Sprintf("nvidia%d", dev.minor) @@ -134,8 +138,9 @@ type frontendFD struct { // These fields are marked nosave since we do not automatically reinvoke // NV_ESC_RM_MAP_MEMORY after restore, so restored FDs have no // mmap_context. - mmapLength uint64 `state:"nosave"` - mmapInternal uintptr `state:"nosave"` + mmapLength uint64 `state:"nosave"` + mmapInternal uintptr `state:"nosave"` + mmapMemType hostarch.MemoryType `state:"nosave"` // clients are handles of clients owned by this frontendFD. clients is // protected by dev.nvp.objsMu. @@ -493,6 +498,7 @@ func rmAllocMemorySystem(fi *frontendIoctlState, ioctlParams *nvgpu.IoctlNVOS02P fi.fd.dev.nvp.objAdd(fi.ctx, ioctlParams.Params.HRoot, ioctlParams.Params.HObjectNew, ioctlParams.Params.HClass, &miscObject{}, ioctlParams.Params.HObjectParent) if createMmapCtx { mapFile.mmapLength = ioctlParams.Params.Limit + 1 + mapFile.mmapMemType = getMemoryType(fi.ctx, mapFile.dev, nvgpu.NVOS33_FLAGS_CACHING_TYPE_DEFAULT) } } fi.fd.dev.nvp.objsUnlock() @@ -1343,6 +1349,15 @@ func rmMapMemory(fi *frontendIoctlState) (uintptr, error) { } if ioctlParams.Params.Status == nvgpu.NV_OK { mapFile.mmapLength = ioctlParams.Params.Length + // src/nvidia/arch/nvalloc/unix/src/escape.c:RmIoctl() forces + // NVOS33_FLAGS_CACHING_TYPE_DEFAULT, but resMap implementations may + // override the "caching type", so in general the memory type depends + // on the mapped object. Conveniently, when this occurs, the caching + // type in pParms->flags must be updated for the call to + // rm_create_mmap_context(), and pParms is subsequently copied back out + // by kernel-open/nvidia/nv.c:nvidia_ioctl(), so we can get the final + // caching type from the updated ioctl params. + mapFile.mmapMemType = getMemoryType(fi.ctx, mapFile.dev, (ioctlParams.Params.Flags>>nvgpu.NVOS33_FLAGS_CACHING_TYPE_SHIFT)&nvgpu.NVOS33_FLAGS_CACHING_TYPE_MASK) } ioctlParams.FD = origFD diff --git a/pkg/sentry/devices/nvproxy/frontend_mmap.go b/pkg/sentry/devices/nvproxy/frontend_mmap.go index 8f15a2c490..241d0c9982 100644 --- a/pkg/sentry/devices/nvproxy/frontend_mmap.go +++ b/pkg/sentry/devices/nvproxy/frontend_mmap.go @@ -15,8 +15,10 @@ package nvproxy import ( + "gvisor.dev/gvisor/pkg/abi/nvgpu" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -75,6 +77,13 @@ func (mf *frontendFDMemmapFile) IncRef(fr memmap.FileRange, memCgID uint32) { func (mf *frontendFDMemmapFile) DecRef(fr memmap.FileRange) { } +// MemoryType implements memmap.File.MemoryType. +func (mf *frontendFDMemmapFile) MemoryType() hostarch.MemoryType { + mf.fd.mmapMu.Lock() + defer mf.fd.mmapMu.Unlock() + return mf.fd.mmapMemType +} + // DataFD implements memmap.File.DataFD. func (mf *frontendFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil @@ -84,3 +93,62 @@ func (mf *frontendFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { func (mf *frontendFDMemmapFile) FD() int { return int(mf.fd.hostFD) } + +func getMemoryType(ctx context.Context, mapDev *frontendDevice, cachingType uint32) hostarch.MemoryType { + // Compare kernel-open/nvidia/nv-mmap.c:nvidia_mmap_helper() => + // nv_encode_caching(). Each NVOS33_FLAGS_CACHING_TYPE_* corresponds + // directly to a NV_MEMORY_*; this is checked by asserts in + // src/nvidia/src/kernel/rmapi/mapping_cpu.c. + if !mapDev.isCtlDevice() { + // NOTE(gvisor.dev/issue/11436): In the !NV_IS_CTL_DEVICE() branch of + // nvidia_mmap_helper(), mmap_context->caching is only honored if + // IS_FB_OFFSET() and !IS_UD_OFFSET(). We can get the information we + // need for IS_FB_OFFSET() from NV_ESC_CARD_INFO, but there doesn't + // seem to be any way for us to replicate IS_UD_OFFSET(). So we must + // conservatively specify uncacheable, which applies in all other + // cases. This is unfortunate since it prevents us from using + // write-combining on framebuffer memory. Empirically, mappings of + // framebuffer memory seem to be fairly common, but none of our tests + // result in any IS_UD_OFFSET (USERD?) mappings. + if log.IsLogging(log.Debug) { + ctx.Debugf("nvproxy: inferred memory type %v for mapping of %s", hostarch.MemoryTypeUncached, mapDev.basename()) + } + return hostarch.MemoryTypeUncached + } + var memType hostarch.MemoryType + switch cachingType { + case nvgpu.NVOS33_FLAGS_CACHING_TYPE_CACHED, nvgpu.NVOS33_FLAGS_CACHING_TYPE_WRITEBACK: + // Note that nv_encode_caching() doesn't actually handle + // NV_MEMORY_WRITEBACK, so this case should fail during host mmap. + memType = hostarch.MemoryTypeWriteBack + case nvgpu.NVOS33_FLAGS_CACHING_TYPE_WRITECOMBINED, nvgpu.NVOS33_FLAGS_CACHING_TYPE_DEFAULT: + // NOTE(gvisor.dev/issue/11436): In the NV_IS_CTL_DEVICE() branch of + // nvidia_mmap_helper(), memory_type is never + // NV_MEMORY_TYPE_FRAMEBUFFER, so this corresponds to + // kernel-open/common/inc/nv-pgprot.h:NV_PGPROT_WRITE_COMBINED(). On + // ARM64, NV_PGPROT_WRITE_COMBINED() => NV_PGPROT_UNCACHED() implicitly + // uses MT_NORMAL (equivalent to our MemoryTypeWriteBack) rather than + // MT_NORMAL_NC when nvos_is_chipset_io_coherent() => + // PDB_PROP_CL_IS_CHIPSET_IO_COHERENT is true, which seems to be the + // case on most systems. We should clarify whether this is an + // optimization or required for correctness (cf. Armv8-M Architecture + // Reference Manual Sec. B7.16 "Mismatched memory attributes"), and + // subsequently whether to replicate it. + memType = hostarch.MemoryTypeWriteCombine + case nvgpu.NVOS33_FLAGS_CACHING_TYPE_UNCACHED, nvgpu.NVOS33_FLAGS_CACHING_TYPE_UNCACHED_WEAK: + // NOTE(gvisor.dev/issue/11436): On ARM64, nv_encode_caching() + // distinguishes between NV_PGPROT_UNCACHED() => MT_NORMAL/MT_NORMAL_NC + // and NV_PGPROT_UNCACHED_DEVICE() => MT_DEVICE_nGnRnE; in context, the + // former is used in the !peer_io (NV_MEMORY_TYPE_SYSTEM) case and the + // latter is used in the peer_io (NV_MEMORY_TYPE_DEVICE_MMIO) case. As + // above, we should clarify whether we need to replicate this behavior. + memType = hostarch.MemoryTypeUncached + default: + ctx.Warningf("nvproxy: unknown caching type %d", cachingType) + memType = hostarch.MemoryTypeUncached + } + if log.IsLogging(log.Debug) { + ctx.Debugf("nvproxy: inferred memory type %v for caching type %d", memType, cachingType) + } + return memType +} diff --git a/pkg/sentry/devices/nvproxy/uvm_mmap.go b/pkg/sentry/devices/nvproxy/uvm_mmap.go index f063b6c251..2241d4f37f 100644 --- a/pkg/sentry/devices/nvproxy/uvm_mmap.go +++ b/pkg/sentry/devices/nvproxy/uvm_mmap.go @@ -63,6 +63,8 @@ func (fd *uvmFD) InvalidateUnsavable(ctx context.Context) error { // +stateify savable type uvmFDMemmapFile struct { + memmap.DefaultMemoryType + fd *uvmFD } diff --git a/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go b/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go index ee06484856..b27eab9fbc 100644 --- a/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go +++ b/pkg/sentry/devices/tpuproxy/accel/accel_fd_mmap.go @@ -16,10 +16,7 @@ package accel import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -61,7 +58,7 @@ func (fd *accelFD) InvalidateUnsavable(ctx context.Context) error { } type accelFDMemmapFile struct { - memmap.NoBufferedIOFallback + memmap.NoMapInternal fd *accelFD } @@ -74,12 +71,6 @@ func (mf *accelFDMemmapFile) IncRef(memmap.FileRange, uint32) { func (mf *accelFDMemmapFile) DecRef(fr memmap.FileRange) { } -// MapInternal implements memmap.File.MapInternal. -func (mf *accelFDMemmapFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { - log.Traceback("accel: rejecting accelFDMemmapFile.MapInternal") - return safemem.BlockSeq{}, linuxerr.EINVAL -} - // DataFD implements memmap.File.DataFD. func (mf *accelFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil diff --git a/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go b/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go index 426804806f..8710b34984 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go +++ b/pkg/sentry/devices/tpuproxy/vfio/pci_device_fd_mmap.go @@ -72,6 +72,11 @@ func (fd *pciDeviceFD) InvalidateUnsavable(ctx context.Context) error { } type pciDeviceFdMemmapFile struct { + // FIXME(jamieliu): This is consistent with legacy behavior, but not + // clearly correct; drivers/vfio/pci/vfio_pci_core.c:vfio_pci_core_mmap() + // uses pgprot_noncached(), which would correspond to our + // MemoryTypeUncached. + memmap.DefaultMemoryType memmap.NoBufferedIOFallback fd *pciDeviceFD diff --git a/pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go b/pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go index 7e98dfa3bf..eb0729e855 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go +++ b/pkg/sentry/devices/tpuproxy/vfio/tpu_fd_mmap.go @@ -16,10 +16,7 @@ package vfio import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -61,7 +58,9 @@ func (fd *tpuFD) InvalidateUnsavable(ctx context.Context) error { } type tpuFDMemmapFile struct { - memmap.NoBufferedIOFallback + // FIXME(jamieliu): IIUC, tpuFD corresponds to Linux's + // drivers/vfio/vfio.c:vfio_group_fops, which does not support mmap at all. + memmap.NoMapInternal fd *tpuFD } @@ -74,12 +73,6 @@ func (mf *tpuFDMemmapFile) IncRef(memmap.FileRange, uint32) { func (mf *tpuFDMemmapFile) DecRef(fr memmap.FileRange) { } -// MapInternal implements memmap.File.MapInternal. -func (mf *tpuFDMemmapFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { - log.Traceback("tpuproxy: rejecting tpuFdMemmapFile.MapInternal") - return safemem.BlockSeq{}, linuxerr.EINVAL -} - // DataFD implements memmap.File.DataFD. func (mf *tpuFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil diff --git a/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go b/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go index 361a0cc613..0e14b4c598 100644 --- a/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go +++ b/pkg/sentry/devices/tpuproxy/vfio/vfio_fd_mmap.go @@ -16,10 +16,7 @@ package vfio import ( "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/memmap" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -61,7 +58,7 @@ func (fd *vfioFD) InvalidateUnsavable(ctx context.Context) error { } type vfioFDMemmapFile struct { - memmap.NoBufferedIOFallback + memmap.NoMapInternal fd *vfioFD } @@ -74,12 +71,6 @@ func (mf *vfioFDMemmapFile) IncRef(memmap.FileRange, uint32) { func (mf *vfioFDMemmapFile) DecRef(fr memmap.FileRange) { } -// MapInternal implements memmap.File.MapInternal. -func (mf *vfioFDMemmapFile) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { - log.Traceback("tpuproxy: rejecting vfioFdMemmapFile.MapInternal") - return safemem.BlockSeq{}, linuxerr.EINVAL -} - // DataFD implements memmap.File.DataFD. func (mf *vfioFDMemmapFile) DataFD(fr memmap.FileRange) (int, error) { return mf.FD(), nil diff --git a/pkg/sentry/fsimpl/erofs/regular_file.go b/pkg/sentry/fsimpl/erofs/regular_file.go index 6d5617153f..0dd37a095a 100644 --- a/pkg/sentry/fsimpl/erofs/regular_file.go +++ b/pkg/sentry/fsimpl/erofs/regular_file.go @@ -200,6 +200,7 @@ func (i *inode) InvalidateUnsavable(ctx context.Context) error { // +stateify savable type imageMemmapFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback image *erofs.Image diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go index 42836a3761..f6b83f69a0 100644 --- a/pkg/sentry/fsimpl/gofer/regular_file.go +++ b/pkg/sentry/fsimpl/gofer/regular_file.go @@ -920,6 +920,7 @@ func (d *dentry) Evict(ctx context.Context, er pgalloc.EvictableRange) { // // +stateify savable type dentryPlatformFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback *dentry diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index 1a423f9c2e..d974e05f9b 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -43,6 +43,7 @@ import ( type specialFileFD struct { fileDescription specialFDEntry + memmap.DefaultMemoryType memmap.NoBufferedIOFallback // releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe diff --git a/pkg/sentry/fsimpl/kernfs/mmap_util.go b/pkg/sentry/fsimpl/kernfs/mmap_util.go index 85ca66bf09..cb01d194bb 100644 --- a/pkg/sentry/fsimpl/kernfs/mmap_util.go +++ b/pkg/sentry/fsimpl/kernfs/mmap_util.go @@ -28,6 +28,7 @@ import ( // // +stateify savable type inodePlatformFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback // hostFD contains the host fd that this file was originally created from, diff --git a/pkg/sentry/memmap/BUILD b/pkg/sentry/memmap/BUILD index 66c9a4731f..d120e52e48 100644 --- a/pkg/sentry/memmap/BUILD +++ b/pkg/sentry/memmap/BUILD @@ -54,6 +54,7 @@ go_library( visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/context", + "//pkg/errors/linuxerr", "//pkg/hostarch", "//pkg/log", "//pkg/safemem", diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go index f4f2226b1a..eb5b18bb4c 100644 --- a/pkg/sentry/memmap/memmap.go +++ b/pkg/sentry/memmap/memmap.go @@ -19,7 +19,9 @@ import ( "fmt" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/hostarch" + "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/safemem" ) @@ -470,15 +472,14 @@ type File interface { // reference is held on the mapped pages. MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) - // DataFD blocks until offsets fr in the file contain valid data, then - // returns the file descriptor represented by the File. - // - // Note that fr.Start and fr.End need not be page-aligned. + // MemoryType returns the memory type that must be used by page table + // entries mapping memory returned by MapInternal. Most implementations of + // File can embed DefaultMemoryType to obtain an appropriate implementation + // of MemoryType. // // Preconditions: - // * fr.Length() > 0. - // * At least one reference must be held on all pages in fr. - DataFD(fr FileRange) (int, error) + // * MapInternal() returned a non-empty BlockSeq. + MemoryType() hostarch.MemoryType // BufferReadAt reads len(dst) bytes from the file into dst, starting at // file offset off. It returns the number of bytes read. Like @@ -506,6 +507,16 @@ type File interface { // * At least one reference must be held on all written pages. BufferWriteAt(off uint64, src []byte) (uint64, error) + // DataFD blocks until offsets fr in the file contain valid data, then + // returns the file descriptor represented by the File. + // + // Note that fr.Start and fr.End need not be page-aligned. + // + // Preconditions: + // * fr.Length() > 0. + // * At least one reference must be held on all pages in fr. + DataFD(fr FileRange) (int, error) + // FD returns the file descriptor represented by the File. The returned // file descriptor should not be used to implement // platform.AddressSpace.MapFile, since the contents of the File may not be @@ -513,6 +524,15 @@ type File interface { FD() int } +// DefaultMemoryType implements File.MemoryType() for implementations of File +// backed by ordinary system memory. +type DefaultMemoryType struct{} + +// MemoryType implements File.MemoryType. +func (DefaultMemoryType) MemoryType() hostarch.MemoryType { + return hostarch.MemoryTypeWriteBack +} + // BufferedIOFallbackErr is returned (by value) by implementations of // File.MapInternal() that cannot succeed, but can still support memory-mapped // I/O by falling back to buffered reads and writes. @@ -538,6 +558,30 @@ func (NoBufferedIOFallback) BufferWriteAt(off uint64, src []byte) (uint64, error panic("unimplemented: memmap.File.MapInternal() should not have returned BufferedIOFallbackErr") } +// NoMapInternal implements File.MapInternal(), File.MemoryType(), +// File.BufferReadAt(), and File.BufferWriteAt() for implementations of File +// that do not support MapInternal. +type NoMapInternal struct { + NoBufferedIOFallback +} + +// MapInternal implements File.MapInternal. +func (NoMapInternal) MapInternal(fr FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { + // There is no equivalent to this situation in Linux, and hence no clear + // errno to return. We choose ENODEV since mmap() returns this in a + // somewhat similar case (mmap() called on a non-mmappable file), and + // ENODEV is relatively uncommon (compared to e.g. EINVAL) so it should be + // somewhat more distinctive if it results in an application-reported + // error. + log.Traceback("no memmap.File.MapInternal implementation available, returning ENODEV") + return safemem.BlockSeq{}, linuxerr.ENODEV +} + +// MemoryType implements File.MemoryType. +func (NoMapInternal) MemoryType() hostarch.MemoryType { + panic("memmap.File.MemoryType called without MapInternal support") +} + // FileRange represents a range of uint64 offsets into a File. // // type FileRange diff --git a/pkg/sentry/mm/debug.go b/pkg/sentry/mm/debug.go index d927b17026..0e7fa82a9f 100644 --- a/pkg/sentry/mm/debug.go +++ b/pkg/sentry/mm/debug.go @@ -91,6 +91,6 @@ func (pseg pmaIterator) debugStringEntryLocked() []byte { b.WriteByte('s') } - fmt.Fprintf(&b, " %08x %T\n", pma.off, pma.file) + fmt.Fprintf(&b, " %s %08x %T\n", pma.file.MemoryType().ShortString(), pma.off, pma.file) return b.Bytes() } diff --git a/pkg/sentry/pgalloc/pgalloc.go b/pkg/sentry/pgalloc/pgalloc.go index e7d8f8dc6f..281389c97e 100644 --- a/pkg/sentry/pgalloc/pgalloc.go +++ b/pkg/sentry/pgalloc/pgalloc.go @@ -43,6 +43,7 @@ const pagesPerHugePage = hostarch.HugePageSize / hostarch.PageSize // MemoryFile is a memmap.File whose pages may be allocated to arbitrary // users. type MemoryFile struct { + memmap.DefaultMemoryType memmap.NoBufferedIOFallback // MemoryFile owns a single backing file. Each page in the backing file is