diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index bbbb5d76cdf..3982b29744e 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -1,5 +1,6 @@ // // (C) Copyright 2018-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -195,6 +196,7 @@ const ( ServerConfigScmDiffClass ServerConfigEngineBdevRolesMismatch ServerConfigSysRsvdZero + ServerConfigHugepagesDisabledWithNrSet ) // SPDK library bindings codes diff --git a/src/control/server/config/faults.go b/src/control/server/config/faults.go index c2f2063357a..9cbe0986de8 100644 --- a/src/control/server/config/faults.go +++ b/src/control/server/config/faults.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -91,6 +92,11 @@ var ( "hugepages cannot be disabled if bdevs have been specified in config", "either set false (or remove) disable_hugepages parameter or remove nvme storage assignment in config and restart the control server", ) + FaultConfigHugepagesDisabledWithNrSet = serverConfigFault( + code.ServerConfigHugepagesDisabledWithNrSet, + "hugepages cannot be disabled if non-zero number has been specified in config", + "either set false (or remove) disable_hugepages parameter or remove nr_hugepages assignment in config and restart the control server", + ) FaultConfigControlMetadataNoPath = serverConfigFault( code.ServerConfigControlMetadataNoPath, "using a control_metadata device requires a path to use as the mount point", diff --git a/src/control/server/config/server.go b/src/control/server/config/server.go index 510e2d26d1b..01f2ccdcbd3 100644 --- a/src/control/server/config/server.go +++ b/src/control/server/config/server.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -33,6 +34,14 @@ const ( defaultConfigPath = "../etc/daos_server.yml" ConfigOut = ".daos_server.active.yml" relConfExamplesPath = "../utils/config/examples/" + + // scanMinHugepageCount is the minimum number of hugepages to allocate in order to satisfy + // SPDK memory requirements when performing a NVMe device scan. + scanMinHugepageCount = 128 + // largeTargetCount is a large number of targets intended to satisfy most typical hugemem + // requirements based on a usual number of targets (16 per-engine on dual engine host). + largeTargetCount = 16 * 2 + largeSysXSCount = 2 ) // SupportConfig is defined here to avoid a import cycle @@ -478,15 +487,13 @@ func hugePageBytes(hpNr, hpSz int) uint64 { return uint64(hpNr*hpSz) * humanize.KiByte } -// SetNrHugepages calculates minimum based on total target count if using nvme. -func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error { - var cfgTargetCount int - var sysXSCount int +// GetTgtCounts returns target count totals for a server config file. +func (cfg *Server) GetTgtCounts(log logging.Logger) (cfgTargetCount, sysXSCount int) { for idx, ec := range cfg.Engines { msg := fmt.Sprintf("engine %d fabric numa %d, storage numa %d", idx, ec.Fabric.NumaNodeIndex, ec.Storage.NumaNodeIndex) - // Calculate overall target count if NVMe is enabled. + // Calculate overall target count if bdevs exist in config. if ec.Storage.Tiers.HaveBdevs() { cfgTargetCount += ec.TargetCount if ec.Storage.Tiers.HasBdevRoleMeta() { @@ -502,37 +509,112 @@ func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error log.Debug(msg) } - if cfgTargetCount <= 0 { - return nil // no nvme, no hugepages required - } + return +} - if cfg.DisableHugepages { - return FaultConfigHugepagesDisabledWithBdevs +func (cfg *Server) getMinMaxNrHugepages(log logging.Logger, hpSizeKiB int) (int, int, error) { + cfgTargetCount, sysXSCount := cfg.GetTgtCounts(log) + + if cfgTargetCount == 0 { + return 0, 0, nil } // Calculate minimum number of hugepages for all configured engines. - minHugepages, err := storage.CalcMinHugepages(mi.HugepageSizeKiB, cfgTargetCount+sysXSCount) + minHugepages, err := storage.CalcMinHugepages(hpSizeKiB, cfgTargetCount+sysXSCount) + if err != nil { + return 0, 0, err + } + + maxTgtCount := largeTargetCount + if sysXSCount > 0 { + maxTgtCount += largeSysXSCount + } + maxHugepages, err := storage.CalcMinHugepages(hpSizeKiB, maxTgtCount) + if err != nil { + return 0, 0, err + } + + var msgSysXS string + if sysXSCount > 0 { + msgSysXS = fmt.Sprintf(" and %d/%d sys-xstreams", sysXSCount, largeSysXSCount) + } + log.Tracef("calculated min/max %d/%d nr_hugepages based on %d/%d targets%s", + minHugepages, maxHugepages, cfgTargetCount, largeTargetCount, msgSysXS) + + if minHugepages > maxHugepages { + log.Debugf("config hugepage requirements exceed normal maximum") + maxHugepages = minHugepages + } + + return minHugepages, maxHugepages, nil +} + +// SetNrHugepages calculates minimum based on total target count if using nvme. If cfg.NrHugepages +// is set to zero, no hugepage allocation requests will be made to the kernel during service +// start-up in prepBdevStorage(). Only set non-zero here if a change is required. +func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error { + minHugepages, maxHugepages, err := cfg.getMinMaxNrHugepages(log, mi.HugepageSizeKiB) if err != nil { return err } - // If the config doesn't specify hugepages, use the minimum. Otherwise, validate - // that the configured amount is sufficient. + // Allow emulated NVMe configurations either with or without hugepages enabled. + + if cfg.DisableHugepages { + if cfg.NrHugepages != 0 { + return FaultConfigHugepagesDisabledWithNrSet + } + if cfg.GetBdevConfigs().HaveRealNVMe() { + return FaultConfigHugepagesDisabledWithBdevs + } + if minHugepages != 0 { + log.Noticef("hugepages disabled but targets will be assigned to bdevs, " + + "this is an atypical situation and caution is advised") + } + + // Hugepages disabled and so zero requested in config. + return nil + } else if minHugepages == 0 { + // Enable minimum needed for scanning NVMe on host in discovery mode. + if cfg.NrHugepages < scanMinHugepageCount && mi.HugepagesTotal < scanMinHugepageCount { + cfg.NrHugepages = scanMinHugepageCount + } + + // Zero tgts on bdevs and min allocation for discovery mode has either been met or + // is being applied. + return nil + } + + log.Infof("%d total hugepages currently allocated on host", mi.HugepagesTotal) + + // If the config doesn't specify nr_hugepages, allocate a large initial value to reduce the + // chance of subsequent allocations on server start causing fragmentation. If the number is + // manually set in the config, notify if the configured amount is insufficient for number of + // targets. If the number of total hugepages in the system is insufficient for the number of + // configured targets then request a larger allocation by increasing cfg.NrHugepages. + if cfg.NrHugepages == 0 { - var msgSysXS string - if sysXSCount > 0 { - msgSysXS = fmt.Sprintf(" and %d sys-xstreams", sysXSCount) + if mi.HugepagesTotal < minHugepages { + if minHugepages == maxHugepages { + log.Debugf("allocating calculated nr_hugepages %d", maxHugepages) + } else { + log.Debugf("allocating large nr_hugepages %d", maxHugepages) + } + cfg.NrHugepages = maxHugepages + } + if cfg.NrHugepages != 0 { + log.Infof("nr_hugepages requested auto-set to %d (%s)", cfg.NrHugepages, + humanize.IBytes(hugePageBytes(cfg.NrHugepages, mi.HugepageSizeKiB))) + } + } else { + log.Infof("nr_hugepages requested manually-set to %d (%s)", cfg.NrHugepages, + humanize.IBytes(hugePageBytes(cfg.NrHugepages, mi.HugepageSizeKiB))) + if cfg.NrHugepages < minHugepages { + log.Noticef("configured nr_hugepages %d is less than recommended %d, "+ + "if this is not intentional update the 'nr_hugepages' config "+ + "parameter or remove and it will be automatically calculated", + cfg.NrHugepages, minHugepages) } - log.Debugf("calculated nr_hugepages: %d for %d targets%s", minHugepages, - cfgTargetCount, msgSysXS) - cfg.NrHugepages = minHugepages - log.Infof("hugepage count automatically set to %d (%s)", minHugepages, - humanize.IBytes(hugePageBytes(minHugepages, mi.HugepageSizeKiB))) - } else if cfg.NrHugepages < minHugepages { - log.Noticef("configured nr_hugepages %d is less than recommended %d, "+ - "if this is not intentional update the 'nr_hugepages' config "+ - "parameter or remove and it will be automatically calculated", - cfg.NrHugepages, minHugepages) } return nil @@ -929,3 +1011,16 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA return nil } + +// GetBdevConfigs retrieves all engine bdev storage tier configs from a server configuration. +func (cfg *Server) GetBdevConfigs() (bdevCfgs storage.TierConfigs) { + if cfg == nil { + return + } + + for _, engineCfg := range cfg.Engines { + bdevCfgs = append(bdevCfgs, engineCfg.Storage.Tiers.BdevConfigs()...) + } + + return +} diff --git a/src/control/server/config/server_test.go b/src/control/server/config/server_test.go index 475d99354cb..24fabd72192 100644 --- a/src/control/server/config/server_test.go +++ b/src/control/server/config/server_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -978,7 +979,7 @@ func TestServerConfig_Validation(t *testing.T) { } } -func TestServerConfig_SetNrHugepages(t *testing.T) { +func TestServerConfig_getMinMaxNrHugepages(t *testing.T) { testDir, cleanup := test.CreateTestDir(t) defer cleanup() @@ -989,59 +990,12 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { defHpSizeKb := 2048 for name, tc := range map[string]struct { - extraConfig func(c *Server) *Server - zeroHpSize bool - expNrHugepages int - expErr error + extraConfig func(c *Server) *Server + zeroHpSize bool + expMinHugepages int + expMaxHugepages int + expErr error }{ - "disabled hugepages; bdevs configured": { - extraConfig: func(c *Server) *Server { - return c.WithDisableHugepages(true). - WithEngines(defaultEngineCfg(). - WithStorage( - storage.NewTierConfig(). - WithStorageClass("ram"). - WithScmMountPoint("/foo"), - storage.NewTierConfig(). - WithStorageClass("nvme"). - WithBdevDeviceList("0000:81:00.0"), - ), - ) - }, - expErr: FaultConfigHugepagesDisabledWithBdevs, - }, - "disabled hugepages; emulated bdevs configured": { - extraConfig: func(c *Server) *Server { - return c.WithDisableHugepages(true). - WithEngines(defaultEngineCfg(). - WithStorage( - storage.NewTierConfig(). - WithStorageClass("ram"). - // 80gib total - (8gib huge + 6gib sys + - // 1gib engine) - WithScmRamdiskSize(65). - WithScmMountPoint("/foo"), - storage.NewTierConfig(). - WithStorageClass("file"). - WithBdevDeviceList("/tmp/daos-bdev"). - WithBdevFileSize(16), - ), - ) - }, - expErr: FaultConfigHugepagesDisabledWithBdevs, - }, - "disabled hugepages; no bdevs configured": { - extraConfig: func(c *Server) *Server { - return c.WithDisableHugepages(true). - WithEngines(defaultEngineCfg(). - WithStorage( - storage.NewTierConfig(). - WithStorageClass("ram"). - WithScmMountPoint("/foo"), - ), - ) - }, - }, "zero hugepage size": { extraConfig: func(c *Server) *Server { return c @@ -1049,7 +1003,7 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { zeroHpSize: true, expErr: errors.New("invalid system hugepage size"), }, - "zero hugepages set in config; bdevs configured; single target count": { + "unset in cfg; bdevs configured; single target count": { extraConfig: func(c *Server) *Server { return c.WithEngines(defaultEngineCfg(). WithTargetCount(1). @@ -1073,9 +1027,10 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { ), ) }, - expNrHugepages: 2048, + expMinHugepages: 2048, + expMaxHugepages: 16384, }, - "zero hugepages set in config; bdevs configured; single target count; md-on-ssd": { + "unset in cfg; bdevs configured; single target count; md-on-ssd": { extraConfig: func(c *Server) *Server { return c.WithEngines(defaultEngineCfg(). WithTargetCount(1). @@ -1101,9 +1056,10 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { ), ) }, - expNrHugepages: 2048, + expMinHugepages: 2048, + expMaxHugepages: 17408, }, - "zero hugepages set in config; bdevs configured": { + "unset in cfg; bdevs configured": { extraConfig: func(c *Server) *Server { return c.WithEngines(defaultEngineCfg(). WithStorage( @@ -1116,9 +1072,27 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { ), ) }, - expNrHugepages: 4096, + expMinHugepages: 4096, + expMaxHugepages: 16384, }, - "zero hugepages set in config; emulated bdevs configured": { + "unset in cfg; bdevs configured; target count exceeds max": { + extraConfig: func(c *Server) *Server { + return c.WithEngines(defaultEngineCfg(). + WithTargetCount(33). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + storage.NewTierConfig(). + WithStorageClass("nvme"). + WithBdevDeviceList("0000:81:00.0"), + ), + ) + }, + expMinHugepages: 16896, + expMaxHugepages: 16896, + }, + "unset in cfg; emulated bdevs configured": { extraConfig: func(c *Server) *Server { return c.WithEngines(defaultEngineCfg(). WithStorage( @@ -1132,9 +1106,10 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { ), ) }, - expNrHugepages: 4096, + expMinHugepages: 4096, + expMaxHugepages: 16384, }, - "zero hugepages set in config; no bdevs configured": { + "unset in cfg; no bdevs configured": { extraConfig: func(c *Server) *Server { return c.WithEngines(defaultEngineCfg(). WithStorage( @@ -1163,7 +1138,8 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { ) }, // 512 pages * (8 targets + 1 sys-xstream for MD-on-SSD) - expNrHugepages: 4608, + expMinHugepages: 4608, + expMaxHugepages: 17408, }, } { t.Run(name, func(t *testing.T) { @@ -1173,11 +1149,227 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { // Apply test case changes to basic config cfg := tc.extraConfig(baseCfg(t, log, testFile)) - mi := &common.MemInfo{ - HugepageSizeKiB: defHpSizeKb, - } + hugepageSizeKiB := defHpSizeKb if tc.zeroHpSize { - mi.HugepageSizeKiB = 0 + hugepageSizeKiB = 0 + } + + minHugepages, maxHugepages, err := cfg.getMinMaxNrHugepages(log, hugepageSizeKiB) + test.CmpErr(t, tc.expErr, err) + if tc.expErr != nil { + return + } + + test.AssertEqual(t, tc.expMinHugepages, minHugepages, + "unexpected number of minimum hugepages calculated from config") + test.AssertEqual(t, tc.expMaxHugepages, maxHugepages, + "unexpected number of maximum hugepages calculated from config") + }) + } +} + +func TestServerConfig_SetNrHugepages(t *testing.T) { + testDir, cleanup := test.CreateTestDir(t) + defer cleanup() + + // First, load a config based on the server config with all options uncommented. + testFile := filepath.Join(testDir, sConfigUncomment) + uncommentServerConfig(t, testFile) + + for name, tc := range map[string]struct { + extraConfig func(c *Server) *Server + hugepagesTotal int + expCfgNrHugepages int + expErr error + }{ + "disabled hugepages; nr_hugepages requested": { + extraConfig: func(c *Server) *Server { + return c.WithDisableHugepages(true). + WithNrHugepages(16896) + }, + expErr: FaultConfigHugepagesDisabledWithNrSet, + }, + "disabled hugepages; bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithDisableHugepages(true). + WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + storage.NewTierConfig(). + WithStorageClass("nvme"). + WithBdevDeviceList("0000:81:00.0"), + ), + ) + }, + expErr: FaultConfigHugepagesDisabledWithBdevs, + }, + "disabled hugepages; emulated bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithDisableHugepages(true). + WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + // 80gib total - (8gib huge + 6gib sys + + // 1gib engine) + WithScmRamdiskSize(65). + WithScmMountPoint("/foo"), + storage.NewTierConfig(). + WithStorageClass("file"). + WithBdevDeviceList("/tmp/daos-bdev"). + WithBdevFileSize(16), + ), + ) + }, + }, + "disabled hugepages; no bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithDisableHugepages(true). + WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + ), + ) + }, + }, + "unset in config; no bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + ), + ) + }, + expCfgNrHugepages: scanMinHugepageCount, + }, + "insufficient hugepages set in config; no bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + ), + ).WithNrHugepages(scanMinHugepageCount - 1) + }, + expCfgNrHugepages: scanMinHugepageCount, + }, + "sufficient hugepages set in config; no bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + ), + ).WithNrHugepages(scanMinHugepageCount + 1) + }, + expCfgNrHugepages: scanMinHugepageCount + 1, + }, + "unset in cfg; insufficient total system hugepages; no bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + ), + ) + }, + hugepagesTotal: scanMinHugepageCount - 1, + expCfgNrHugepages: scanMinHugepageCount, + }, + "unset in cfg; sufficient total system hugepages; no bdevs configured": { + extraConfig: func(c *Server) *Server { + return c.WithEngines(defaultEngineCfg(). + WithStorage( + storage.NewTierConfig(). + WithStorageClass("ram"). + WithScmMountPoint("/foo"), + ), + ) + }, + hugepagesTotal: scanMinHugepageCount + 1, + }, + "md-on-ssd enabled with explicit role assignment; zero total system hugepages": { + extraConfig: func(c *Server) *Server { + return c.WithEngines( + defaultEngineCfg(). + WithFabricInterfacePort(1234). + WithStorage( + storage.NewTierConfig(). + WithScmMountPoint("/mnt/daos/1"). + WithStorageClass("ram"). + WithScmDisableHugepages(), + storage.NewTierConfig(). + WithStorageClass("nvme"). + WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). + WithBdevDeviceRoles(storage.BdevRoleAll), + ), + ) + }, + // Initial maximum set, disregarding minimum calculated. + // Min: 512 pages * (8 targets + 1 sys-xstream for MD-on-SSD). Max: 512 + // pages * 32 targets + 2 sys-xstream. + expCfgNrHugepages: 17408, + }, + "md-on-ssd enabled with explicit role assignment; sufficient total system hugepages": { + extraConfig: func(c *Server) *Server { + return c.WithEngines( + defaultEngineCfg(). + WithFabricInterfacePort(1234). + WithStorage( + storage.NewTierConfig(). + WithScmMountPoint("/mnt/daos/1"). + WithStorageClass("ram"). + WithScmDisableHugepages(), + storage.NewTierConfig(). + WithStorageClass("nvme"). + WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). + WithBdevDeviceRoles(storage.BdevRoleAll), + ), + ) + }, + // Total system hugepages meets minimum requirement, no change required. + // Min: 512 pages * (8 targets + 1 sys-xstream for MD-on-SSD). + hugepagesTotal: 4608, + }, + "md-on-ssd enabled with explicit role assignment; manual nr_hugepages in cfg": { + extraConfig: func(c *Server) *Server { + return c.WithNrHugepages(4000). + WithEngines(defaultEngineCfg(). + WithFabricInterfacePort(1234). + WithStorage( + storage.NewTierConfig(). + WithScmMountPoint("/mnt/daos/1"). + WithStorageClass("ram"). + WithScmDisableHugepages(), + storage.NewTierConfig(). + WithStorageClass("nvme"). + WithBdevDeviceList("0000:81:00.0", "0000:82:00.0"). + WithBdevDeviceRoles(storage.BdevRoleAll), + ), + ) + }, + expCfgNrHugepages: 4000, + }, + } { + t.Run(name, func(t *testing.T) { + log, buf := logging.NewTestLogger(t.Name()) + defer test.ShowBufferOnFailure(t, buf) + + // Apply test case changes to basic config + cfg := tc.extraConfig(baseCfg(t, log, testFile)) + + mi := &common.MemInfo{ + HugepagesTotal: tc.hugepagesTotal, + HugepageSizeKiB: 2048, } test.CmpErr(t, tc.expErr, cfg.SetNrHugepages(log, mi)) @@ -1185,7 +1377,7 @@ func TestServerConfig_SetNrHugepages(t *testing.T) { return } - test.AssertEqual(t, tc.expNrHugepages, cfg.NrHugepages, + test.AssertEqual(t, tc.expCfgNrHugepages, cfg.NrHugepages, "unexpected number of hugepages set in config") }) } diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 832dee3eab7..f9f4c6cdc07 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -1,5 +1,6 @@ // // (C) Copyright 2019-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -250,7 +251,7 @@ func bdevScan(ctx context.Context, cs *ControlService, req *ctlpb.ScanNvmeReq, n } }() - bdevCfgs := getBdevCfgsFromSrvCfg(cs.srvCfg) + bdevCfgs := cs.srvCfg.GetBdevConfigs() nrCfgBdevs := bdevCfgs.Bdevs().Len() if nrCfgBdevs == 0 { diff --git a/src/control/server/server_utils.go b/src/control/server/server_utils.go index 1e8c9e0c14d..4fd552b540b 100644 --- a/src/control/server/server_utils.go +++ b/src/control/server/server_utils.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -46,10 +47,6 @@ const ( // available memory in order to start the engines. memCheckThreshold = 90 - // scanMinHugepageCount is the minimum number of hugepages to allocate in order to satisfy - // SPDK memory requirements when performing a NVMe device scan. - scanMinHugepageCount = 128 - // maxLineChars is the maximum number of chars per line in a formatted byte string. maxLineChars = 32 ) @@ -101,15 +98,6 @@ func resolveFirstAddr(addr string, lookup ipLookupFn) (*net.TCPAddr, error) { return &net.TCPAddr{IP: addrs[0], Port: iPort}, nil } -func getBdevCfgsFromSrvCfg(cfg *config.Server) storage.TierConfigs { - var bdevCfgs storage.TierConfigs - for _, engineCfg := range cfg.Engines { - bdevCfgs = append(bdevCfgs, engineCfg.Storage.Tiers.BdevConfigs()...) - } - - return bdevCfgs -} - func cfgGetReplicas(cfg *config.Server, lookup ipLookupFn) ([]*net.TCPAddr, error) { var dbReplicas []*net.TCPAddr for _, rep := range cfg.MgmtSvcReplicas { @@ -308,12 +296,15 @@ func getEngineNUMANodes(log logging.Logger, engineCfgs []*engine.Config) ([]stri func prepBdevStorage(srv *server, iommuEnabled bool) error { defer srv.logDuration(track("time to prepare bdev storage")) + if srv.cfg == nil { + return errors.New("nil server config") + } if srv.cfg.DisableHugepages { - srv.log.Debugf("skip nvme prepare as disable_hugepages: true in config") + srv.log.Debugf("skip nvme prepare as disable_hugepages is set true in config") return nil } - bdevCfgs := getBdevCfgsFromSrvCfg(srv.cfg) + bdevCfgs := srv.cfg.GetBdevConfigs() // Perform these checks only if non-emulated NVMe is used and user is unprivileged. if bdevCfgs.HaveRealNVMe() && srv.runningUser.Username != "root" { @@ -354,42 +345,40 @@ func prepBdevStorage(srv *server, iommuEnabled bool) error { } if bdevCfgs.HaveBdevs() { - // The NrHugepages config value is a total for all engines. Distribute allocation - // of hugepages across each engine's numa node (as validation ensures that - // TargetsCount is equal for each engine). Assumes an equal number of engine's per - // numa node. - numaNodes, err := getEngineNUMANodes(srv.log, srv.cfg.Engines) - if err != nil { - return err - } + if srv.cfg.NrHugepages > 0 { + // The NrHugepages config value is a total for all engines. Distribute + // allocation of hugepages across each engine's numa node (as validation + // ensures that TargetsCount is equal for each engine). Assumes an equal + // number of engine's per numa node. + numaNodes, err := getEngineNUMANodes(srv.log, srv.cfg.Engines) + if err != nil { + return err + } - if len(numaNodes) == 0 { - return errors.New("invalid number of numa nodes detected (0)") - } + if len(numaNodes) == 0 { + return errors.New("invalid number of numa nodes detected (0)") + } - // Request a few more hugepages than actually required for each NUMA node - // allocation as some overhead may result in one or two being unavailable. - prepReq.HugepageCount = srv.cfg.NrHugepages / len(numaNodes) + // Request a few more hugepages than actually required for each NUMA node + // allocation as some overhead may result in one or two being unavailable. + prepReq.HugepageCount = srv.cfg.NrHugepages / len(numaNodes) - // Extra pages to be allocated per engine but take into account the page count - // will be issued on each NUMA node. - extraPages := (extraHugepages * len(srv.cfg.Engines)) / len(numaNodes) - prepReq.HugepageCount += extraPages - prepReq.HugeNodes = strings.Join(numaNodes, ",") + // Extra pages to be allocated per engine but take into account the page + // count will be issued on each NUMA node. + extraPages := (extraHugepages * len(srv.cfg.Engines)) / len(numaNodes) + prepReq.HugepageCount += extraPages + prepReq.HugeNodes = strings.Join(numaNodes, ",") - srv.log.Debugf("allocating %d hugepages on each of these numa nodes: %v", - prepReq.HugepageCount, numaNodes) - } else { - if srv.cfg.NrHugepages == 0 { - // If nr_hugepages is unset then set minimum needed for scanning in prepare - // request. - prepReq.HugepageCount = scanMinHugepageCount + srv.log.Debugf("allocating %d hugepages on each of these numa nodes: %v", + prepReq.HugepageCount, numaNodes) } else { - // If nr_hugepages has been set manually but no bdevs in config then - // allocate on numa node 0 (for example if a bigger number of hugepages are - // required in discovery mode for an unusually large number of SSDs). - prepReq.HugepageCount = srv.cfg.NrHugepages + srv.log.Debugf("skip allocating hugepages, no change is required") } + } else { + // If nr_hugepages has been set manually but no bdevs in config then allocate on + // numa node 0 (for example if a bigger number of hugepages are required in + // discovery mode for an unusually large number of SSDs). + prepReq.HugepageCount = srv.cfg.NrHugepages srv.log.Debugf("allocating %d hugepages on numa node 0", prepReq.HugepageCount) } @@ -436,8 +425,20 @@ func updateHugeMemValues(srv *server, ei *EngineInstance, mi *common.MemInfo) er } ei.RUnlock() + cfgNrTgts, nrSysXS := srv.cfg.GetTgtCounts(srv.log) + + minHugepages := 0 + if cfgNrTgts != 0 { + // Calculate minimum number of hugepages for all configured engines. + mhps, err := storage.CalcMinHugepages(mi.HugepageSizeKiB, cfgNrTgts+nrSysXS) + if err != nil { + return err + } + minHugepages = mhps + } + // Calculate mem_size per I/O engine (in MB) from number of hugepages required per engine. - nrPagesRequired := srv.cfg.NrHugepages / len(srv.cfg.Engines) + nrPagesRequired := minHugepages / len(srv.cfg.Engines) pageSizeMiB := mi.HugepageSizeKiB / humanize.KiByte // kib to mib memSizeReqMiB := nrPagesRequired * pageSizeMiB memSizeFreeMiB := mi.HugepagesFree * pageSizeMiB @@ -566,6 +567,12 @@ func registerEngineEventCallbacks(srv *server, engine *EngineInstance, allStarte if engine.storage.BdevRoleMetaConfigured() { return engine.storage.UnmountTmpfs() } + if !srv.cfg.DisableHugepages { + // Attempt to remove unused hugepages, log error only. + if err := cleanEngineHugepages(srv); err != nil { + srv.log.Errorf(err.Error()) + } + } return nil }) diff --git a/src/control/server/server_utils_test.go b/src/control/server/server_utils_test.go index 66b2a4595cc..ad846b26a78 100644 --- a/src/control/server/server_utils_test.go +++ b/src/control/server/server_utils_test.go @@ -1,5 +1,6 @@ // // (C) Copyright 2021-2024 Intel Corporation. +// (C) Copyright 2025 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -322,9 +323,9 @@ func TestServer_prepBdevStorage(t *testing.T) { WithEngines(pmemEngine(0)) }, overrideUser: "root", - hugepagesFree: 8192, + hugepagesFree: 16386, expPrepCall: &storage.BdevPrepareRequest{ - HugepageCount: 8194, + HugepageCount: 16386, HugeNodes: "0", TargetUser: "root", DisableVFIO: true, @@ -338,9 +339,9 @@ func TestServer_prepBdevStorage(t *testing.T) { return sc.WithDisableVFIO(true). WithEngines(pmemFakeNvmeEngine(0)) }, - hugepagesFree: 8192, + hugepagesFree: 16386, expPrepCall: &storage.BdevPrepareRequest{ - HugepageCount: 8194, + HugepageCount: 16386, HugeNodes: "0", TargetUser: username, DisableVFIO: true, @@ -361,9 +362,9 @@ func TestServer_prepBdevStorage(t *testing.T) { return sc.WithEngines(pmemEngine(0)) }, overrideUser: "root", - hugepagesFree: 8192, + hugepagesFree: 16386, expPrepCall: &storage.BdevPrepareRequest{ - HugepageCount: 8194, + HugepageCount: 16386, HugeNodes: "0", TargetUser: "root", PCIAllowList: test.MockPCIAddr(0), @@ -376,9 +377,9 @@ func TestServer_prepBdevStorage(t *testing.T) { srvCfgExtra: func(sc *config.Server) *config.Server { return sc.WithEngines(pmemFakeNvmeEngine(0)) }, - hugepagesFree: 8192, + hugepagesFree: 16386, expPrepCall: &storage.BdevPrepareRequest{ - HugepageCount: 8194, + HugepageCount: 16386, HugeNodes: "0", TargetUser: username, }, @@ -397,7 +398,7 @@ func TestServer_prepBdevStorage(t *testing.T) { WithEngines(pmemOnlyEngine(0), pmemOnlyEngine(1)) }, expPrepCall: &storage.BdevPrepareRequest{ - HugepageCount: scanMinHugepageCount, + HugepageCount: 128, TargetUser: username, EnableVMD: true, },