-
Notifications
You must be signed in to change notification settings - Fork 308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-16979 control: Reduce frequency of hugepage allocation at runtime #15848
base: master
Are you sure you want to change the base?
Changes from all commits
b1e9861
1377615
2605af9
80d9874
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
// | ||
// (C) Copyright 2020-2024 Intel Corporation. | ||
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP | ||
// | ||
// SPDX-License-Identifier: BSD-2-Clause-Patent | ||
// | ||
|
@@ -33,6 +34,14 @@ const ( | |
defaultConfigPath = "../etc/daos_server.yml" | ||
ConfigOut = ".daos_server.active.yml" | ||
relConfExamplesPath = "../utils/config/examples/" | ||
|
||
// scanMinHugepageCount is the minimum number of hugepages to allocate in order to satisfy | ||
// SPDK memory requirements when performing a NVMe device scan. | ||
scanMinHugepageCount = 128 | ||
// largeTargetCount is a large number of targets intended to satisfy most typical hugemem | ||
// requirements based on a usual number of targets (16 per-engine on dual engine host). | ||
largeTargetCount = 16 * 2 | ||
largeSysXSCount = 2 | ||
) | ||
|
||
// SupportConfig is defined here to avoid a import cycle | ||
|
@@ -478,15 +487,13 @@ func hugePageBytes(hpNr, hpSz int) uint64 { | |
return uint64(hpNr*hpSz) * humanize.KiByte | ||
} | ||
|
||
// SetNrHugepages calculates minimum based on total target count if using nvme. | ||
func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error { | ||
var cfgTargetCount int | ||
var sysXSCount int | ||
// GetTgtCounts returns target count totals for a server config file. | ||
func (cfg *Server) GetTgtCounts(log logging.Logger) (cfgTargetCount, sysXSCount int) { | ||
for idx, ec := range cfg.Engines { | ||
msg := fmt.Sprintf("engine %d fabric numa %d, storage numa %d", idx, | ||
ec.Fabric.NumaNodeIndex, ec.Storage.NumaNodeIndex) | ||
|
||
// Calculate overall target count if NVMe is enabled. | ||
// Calculate overall target count if bdevs exist in config. | ||
if ec.Storage.Tiers.HaveBdevs() { | ||
cfgTargetCount += ec.TargetCount | ||
if ec.Storage.Tiers.HasBdevRoleMeta() { | ||
|
@@ -502,37 +509,112 @@ func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error | |
log.Debug(msg) | ||
} | ||
|
||
if cfgTargetCount <= 0 { | ||
return nil // no nvme, no hugepages required | ||
} | ||
return | ||
} | ||
|
||
if cfg.DisableHugepages { | ||
return FaultConfigHugepagesDisabledWithBdevs | ||
func (cfg *Server) getMinMaxNrHugepages(log logging.Logger, hpSizeKiB int) (int, int, error) { | ||
cfgTargetCount, sysXSCount := cfg.GetTgtCounts(log) | ||
|
||
if cfgTargetCount == 0 { | ||
return 0, 0, nil | ||
} | ||
|
||
// Calculate minimum number of hugepages for all configured engines. | ||
minHugepages, err := storage.CalcMinHugepages(mi.HugepageSizeKiB, cfgTargetCount+sysXSCount) | ||
minHugepages, err := storage.CalcMinHugepages(hpSizeKiB, cfgTargetCount+sysXSCount) | ||
if err != nil { | ||
return 0, 0, err | ||
} | ||
|
||
maxTgtCount := largeTargetCount | ||
if sysXSCount > 0 { | ||
maxTgtCount += largeSysXSCount | ||
} | ||
maxHugepages, err := storage.CalcMinHugepages(hpSizeKiB, maxTgtCount) | ||
if err != nil { | ||
return 0, 0, err | ||
} | ||
|
||
var msgSysXS string | ||
if sysXSCount > 0 { | ||
msgSysXS = fmt.Sprintf(" and %d/%d sys-xstreams", sysXSCount, largeSysXSCount) | ||
} | ||
log.Tracef("calculated min/max %d/%d nr_hugepages based on %d/%d targets%s", | ||
minHugepages, maxHugepages, cfgTargetCount, largeTargetCount, msgSysXS) | ||
|
||
if minHugepages > maxHugepages { | ||
log.Debugf("config hugepage requirements exceed normal maximum") | ||
maxHugepages = minHugepages | ||
} | ||
|
||
return minHugepages, maxHugepages, nil | ||
} | ||
|
||
// SetNrHugepages calculates minimum based on total target count if using nvme. If cfg.NrHugepages | ||
// is set to zero, no hugepage allocation requests will be made to the kernel during service | ||
// start-up in prepBdevStorage(). Only set non-zero here if a change is required. | ||
func (cfg *Server) SetNrHugepages(log logging.Logger, mi *common.MemInfo) error { | ||
minHugepages, maxHugepages, err := cfg.getMinMaxNrHugepages(log, mi.HugepageSizeKiB) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// If the config doesn't specify hugepages, use the minimum. Otherwise, validate | ||
// that the configured amount is sufficient. | ||
// Allow emulated NVMe configurations either with or without hugepages enabled. | ||
|
||
if cfg.DisableHugepages { | ||
if cfg.NrHugepages != 0 { | ||
return FaultConfigHugepagesDisabledWithNrSet | ||
} | ||
if cfg.GetBdevConfigs().HaveRealNVMe() { | ||
return FaultConfigHugepagesDisabledWithBdevs | ||
} | ||
if minHugepages != 0 { | ||
log.Noticef("hugepages disabled but targets will be assigned to bdevs, " + | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't disagree with doing something here, but logging "caution is advised" is not particularly helpful, IMO. Is it an error or not? What is the admin supposed to do if/when they happen to notice this message in the server log? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is to indicate that the server is operating in an unusual mode, the administrator should be aware of that |
||
"this is an atypical situation and caution is advised") | ||
} | ||
|
||
// Hugepages disabled and so zero requested in config. | ||
return nil | ||
} else if minHugepages == 0 { | ||
// Enable minimum needed for scanning NVMe on host in discovery mode. | ||
if cfg.NrHugepages < scanMinHugepageCount && mi.HugepagesTotal < scanMinHugepageCount { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am probably missing something but I was thinking that |
||
cfg.NrHugepages = scanMinHugepageCount | ||
} | ||
|
||
// Zero tgts on bdevs and min allocation for discovery mode has either been met or | ||
// is being applied. | ||
return nil | ||
} | ||
|
||
log.Infof("%d total hugepages currently allocated on host", mi.HugepagesTotal) | ||
|
||
// If the config doesn't specify nr_hugepages, allocate a large initial value to reduce the | ||
// chance of subsequent allocations on server start causing fragmentation. If the number is | ||
// manually set in the config, notify if the configured amount is insufficient for number of | ||
// targets. If the number of total hugepages in the system is insufficient for the number of | ||
// configured targets then request a larger allocation by increasing cfg.NrHugepages. | ||
|
||
if cfg.NrHugepages == 0 { | ||
var msgSysXS string | ||
if sysXSCount > 0 { | ||
msgSysXS = fmt.Sprintf(" and %d sys-xstreams", sysXSCount) | ||
if mi.HugepagesTotal < minHugepages { | ||
if minHugepages == maxHugepages { | ||
log.Debugf("allocating calculated nr_hugepages %d", maxHugepages) | ||
} else { | ||
log.Debugf("allocating large nr_hugepages %d", maxHugepages) | ||
} | ||
cfg.NrHugepages = maxHugepages | ||
} | ||
if cfg.NrHugepages != 0 { | ||
log.Infof("nr_hugepages requested auto-set to %d (%s)", cfg.NrHugepages, | ||
humanize.IBytes(hugePageBytes(cfg.NrHugepages, mi.HugepageSizeKiB))) | ||
} | ||
} else { | ||
log.Infof("nr_hugepages requested manually-set to %d (%s)", cfg.NrHugepages, | ||
humanize.IBytes(hugePageBytes(cfg.NrHugepages, mi.HugepageSizeKiB))) | ||
if cfg.NrHugepages < minHugepages { | ||
log.Noticef("configured nr_hugepages %d is less than recommended %d, "+ | ||
"if this is not intentional update the 'nr_hugepages' config "+ | ||
"parameter or remove and it will be automatically calculated", | ||
cfg.NrHugepages, minHugepages) | ||
} | ||
log.Debugf("calculated nr_hugepages: %d for %d targets%s", minHugepages, | ||
cfgTargetCount, msgSysXS) | ||
cfg.NrHugepages = minHugepages | ||
log.Infof("hugepage count automatically set to %d (%s)", minHugepages, | ||
humanize.IBytes(hugePageBytes(minHugepages, mi.HugepageSizeKiB))) | ||
} else if cfg.NrHugepages < minHugepages { | ||
log.Noticef("configured nr_hugepages %d is less than recommended %d, "+ | ||
"if this is not intentional update the 'nr_hugepages' config "+ | ||
"parameter or remove and it will be automatically calculated", | ||
cfg.NrHugepages, minHugepages) | ||
} | ||
|
||
return nil | ||
|
@@ -929,3 +1011,16 @@ func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineA | |
|
||
return nil | ||
} | ||
|
||
// GetBdevConfigs retrieves all engine bdev storage tier configs from a server configuration. | ||
func (cfg *Server) GetBdevConfigs() (bdevCfgs storage.TierConfigs) { | ||
if cfg == nil { | ||
return | ||
} | ||
|
||
for _, engineCfg := range cfg.Engines { | ||
bdevCfgs = append(bdevCfgs, engineCfg.Storage.Tiers.BdevConfigs()...) | ||
} | ||
|
||
return | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be logged at NOTICE level? Who is it for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is just a debug message because the user doesn't need to do anything about it, it is an indication that the configuration requires more huge pages than the normal maximum