Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16979 control: Reduce frequency of hugepage allocation at runtime #15848

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/control/fault/code/codes.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2018-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -195,6 +196,7 @@ const (
ServerConfigScmDiffClass
ServerConfigEngineBdevRolesMismatch
ServerConfigSysRsvdZero
ServerConfigHugepagesDisabledWithNrSet
)

// SPDK library bindings codes
Expand Down
6 changes: 6 additions & 0 deletions src/control/server/config/faults.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2020-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -91,6 +92,11 @@ var (
"hugepages cannot be disabled if bdevs have been specified in config",
"either set false (or remove) disable_hugepages parameter or remove nvme storage assignment in config and restart the control server",
)
FaultConfigHugepagesDisabledWithNrSet = serverConfigFault(
code.ServerConfigHugepagesDisabledWithNrSet,
"hugepages cannot be disabled if non-zero number has been specified in config",
"either set false (or remove) disable_hugepages parameter or remove nr_hugepages assignment in config and restart the control server",
)
FaultConfigControlMetadataNoPath = serverConfigFault(
code.ServerConfigControlMetadataNoPath,
"using a control_metadata device requires a path to use as the mount point",
Expand Down
147 changes: 121 additions & 26 deletions src/control/server/config/server.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2020-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -33,6 +34,14 @@ const (
defaultConfigPath = "../etc/daos_server.yml"
ConfigOut = ".daos_server.active.yml"
relConfExamplesPath = "../utils/config/examples/"

// scanMinHugepageCount is the minimum number of hugepages to allocate in order to satisfy
// SPDK memory requirements when performing a NVMe device scan.
scanMinHugepageCount = 128
// largeTargetCount is a large number of targets intended to satisfy most typical hugemem
// requirements based on a usual number of targets (16 per-engine on dual engine host).
largeTargetCount = 16 * 2
largeSysXSCount = 2
)

// SupportConfig is defined here to avoid a import cycle
Expand Down Expand Up @@ -478,15 +487,13 @@ func hugePageBytes(hpNr, hpSz int) uint64 {
return uint64(hpNr*hpSz) * humanize.KiByte
}

// SetNrHugepages calculates minimum based on total target count if using nvme.
func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error {
var cfgTargetCount int
var sysXSCount int
// GetTgtCounts returns target count totals for a server config file.
func (cfg *Server) GetTgtCounts(log logging.Logger) (cfgTargetCount, sysXSCount int) {
for idx, ec := range cfg.Engines {
msg := fmt.Sprintf("engine %d fabric numa %d, storage numa %d", idx,
ec.Fabric.NumaNodeIndex, ec.Storage.NumaNodeIndex)

// Calculate overall target count if NVMe is enabled.
// Calculate overall target count if bdevs exist in config.
if ec.Storage.Tiers.HaveBdevs() {
cfgTargetCount += ec.TargetCount
if ec.Storage.Tiers.HasBdevRoleMeta() {
Expand All @@ -502,37 +509,112 @@ func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error
log.Debug(msg)
}

if cfgTargetCount <= 0 {
return nil // no nvme, no hugepages required
}
return
}

if cfg.DisableHugepages {
return FaultConfigHugepagesDisabledWithBdevs
func (cfg *Server) getMinMaxNrHugepages(log logging.Logger, hpSizeKiB int) (int, int, error) {
cfgTargetCount, sysXSCount := cfg.GetTgtCounts(log)

if cfgTargetCount == 0 {
return 0, 0, nil
}

// Calculate minimum number of hugepages for all configured engines.
minHugepages, err := storage.CalcMinHugepages(mi.HugepageSizeKiB, cfgTargetCount+sysXSCount)
minHugepages, err := storage.CalcMinHugepages(hpSizeKiB, cfgTargetCount+sysXSCount)
if err != nil {
return 0, 0, err
}

maxTgtCount := largeTargetCount
if sysXSCount > 0 {
maxTgtCount += largeSysXSCount
}
maxHugepages, err := storage.CalcMinHugepages(hpSizeKiB, maxTgtCount)
if err != nil {
return 0, 0, err
}

var msgSysXS string
if sysXSCount > 0 {
msgSysXS = fmt.Sprintf(" and %d/%d sys-xstreams", sysXSCount, largeSysXSCount)
}
log.Tracef("calculated min/max %d/%d nr_hugepages based on %d/%d targets%s",
minHugepages, maxHugepages, cfgTargetCount, largeTargetCount, msgSysXS)

if minHugepages > maxHugepages {
log.Debugf("config hugepage requirements exceed normal maximum")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be logged at NOTICE level? Who is it for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is just a debug message because the user doesn't need to do anything about it, it is an indication that the configuration requires more huge pages than the normal maximum

maxHugepages = minHugepages
}

return minHugepages, maxHugepages, nil
}

// SetNrHugepages calculates minimum based on total target count if using nvme. If cfg.NrHugepages
// is set to zero, no hugepage allocation requests will be made to the kernel during service
// start-up in prepBdevStorage(). Only set non-zero here if a change is required.
func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error {
minHugepages, maxHugepages, err := cfg.getMinMaxNrHugepages(log, mi.HugepageSizeKiB)
if err != nil {
return err
}

// If the config doesn't specify hugepages, use the minimum. Otherwise, validate
// that the configured amount is sufficient.
// Allow emulated NVMe configurations either with or without hugepages enabled.

if cfg.DisableHugepages {
if cfg.NrHugepages != 0 {
return FaultConfigHugepagesDisabledWithNrSet
}
if cfg.GetBdevConfigs().HaveRealNVMe() {
return FaultConfigHugepagesDisabledWithBdevs
}
if minHugepages != 0 {
log.Noticef("hugepages disabled but targets will be assigned to bdevs, " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't disagree with doing something here, but logging "caution is advised" is not particularly helpful, IMO. Is it an error or not? What is the admin supposed to do if/when they happen to notice this message in the server log?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is to indicate that the server is operating in an unusual mode, the administrator should be aware of that

"this is an atypical situation and caution is advised")
}

// Hugepages disabled and so zero requested in config.
return nil
} else if minHugepages == 0 {
// Enable minimum needed for scanning NVMe on host in discovery mode.
if cfg.NrHugepages < scanMinHugepageCount && mi.HugepagesTotal < scanMinHugepageCount {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am probably missing something but I was thinking that mi.HugepagesTotal was the number of available Huge Pages, and thus cfg.NrHugePages could not be greater than this first value.

cfg.NrHugepages = scanMinHugepageCount
}

// Zero tgts on bdevs and min allocation for discovery mode has either been met or
// is being applied.
return nil
}

log.Infof("%d total hugepages currently allocated on host", mi.HugepagesTotal)

// If the config doesn't specify nr_hugepages, allocate a large initial value to reduce the
// chance of subsequent allocations on server start causing fragmentation. If the number is
// manually set in the config, notify if the configured amount is insufficient for number of
// targets. If the number of total hugepages in the system is insufficient for the number of
// configured targets then request a larger allocation by increasing cfg.NrHugepages.

if cfg.NrHugepages == 0 {
var msgSysXS string
if sysXSCount > 0 {
msgSysXS = fmt.Sprintf(" and %d sys-xstreams", sysXSCount)
if mi.HugepagesTotal < minHugepages {
if minHugepages == maxHugepages {
log.Debugf("allocating calculated nr_hugepages %d", maxHugepages)
} else {
log.Debugf("allocating large nr_hugepages %d", maxHugepages)
}
cfg.NrHugepages = maxHugepages
}
if cfg.NrHugepages != 0 {
log.Infof("nr_hugepages requested auto-set to %d (%s)", cfg.NrHugepages,
humanize.IBytes(hugePageBytes(cfg.NrHugepages, mi.HugepageSizeKiB)))
}
} else {
log.Infof("nr_hugepages requested manually-set to %d (%s)", cfg.NrHugepages,
humanize.IBytes(hugePageBytes(cfg.NrHugepages, mi.HugepageSizeKiB)))
if cfg.NrHugepages < minHugepages {
log.Noticef("configured nr_hugepages %d is less than recommended %d, "+
"if this is not intentional update the 'nr_hugepages' config "+
"parameter or remove and it will be automatically calculated",
cfg.NrHugepages, minHugepages)
}
log.Debugf("calculated nr_hugepages: %d for %d targets%s", minHugepages,
cfgTargetCount, msgSysXS)
cfg.NrHugepages = minHugepages
log.Infof("hugepage count automatically set to %d (%s)", minHugepages,
humanize.IBytes(hugePageBytes(minHugepages, mi.HugepageSizeKiB)))
} else if cfg.NrHugepages < minHugepages {
log.Noticef("configured nr_hugepages %d is less than recommended %d, "+
"if this is not intentional update the 'nr_hugepages' config "+
"parameter or remove and it will be automatically calculated",
cfg.NrHugepages, minHugepages)
}

return nil
Expand Down Expand Up @@ -929,3 +1011,16 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA

return nil
}

// GetBdevConfigs retrieves all engine bdev storage tier configs from a server configuration.
func (cfg *Server) GetBdevConfigs() (bdevCfgs storage.TierConfigs) {
if cfg == nil {
return
}

for _, engineCfg := range cfg.Engines {
bdevCfgs = append(bdevCfgs, engineCfg.Storage.Tiers.BdevConfigs()...)
}

return
}
Loading