Skip to content

Commit

Permalink
Merge pull request #786 from luomingmeng/dev/fix-sysadvisor-reclaim-s…
Browse files Browse the repository at this point in the history
…tate

fix(*): fix sysadvisor state reclaim pod's TopologyAwareAssignments i…
  • Loading branch information
gary-lgy authored Feb 24, 2025
2 parents 80d36ed + e1f2148 commit 8168777
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,9 @@ func (p *DynamicPolicy) GetCheckpoint(_ context.Context,

chkEntries[uid].Entries[entryName].OwnerPoolName = ownerPoolName

// not set topology-aware assignments for shared_cores and reclaimed_cores,
// not set topology-aware assignments for shared_cores,
// since their topology-aware assignments are same to the pools they are in.
if (!allocationInfo.CheckShared() && !allocationInfo.CheckReclaimed()) || containerEntries.IsPoolEntry() {
if !allocationInfo.CheckShared() || containerEntries.IsPoolEntry() {
chkEntries[uid].Entries[entryName].TopologyAwareAssignments = machine.ParseCPUAssignmentFormat(allocationInfo.TopologyAwareAssignments)
chkEntries[uid].Entries[entryName].OriginalTopologyAwareAssignments = machine.ParseCPUAssignmentFormat(allocationInfo.OriginalTopologyAwareAssignments)
}
Expand Down Expand Up @@ -274,9 +274,9 @@ func (p *DynamicPolicy) createGetAdviceRequest() (*advisorapi.GetAdviceRequest,

info.AllocationInfo.OwnerPoolName = ownerPoolName

// not set topology-aware assignments for shared_cores and reclaimed_cores,
// not set topology-aware assignments for shared_cores,
// since their topology-aware assignments are same to the pools they are in.
if (!allocationInfo.CheckShared() && !allocationInfo.CheckReclaimed()) || containerEntries.IsPoolEntry() {
if !allocationInfo.CheckShared() || containerEntries.IsPoolEntry() {
info.AllocationInfo.TopologyAwareAssignments = machine.ParseCPUAssignmentFormat(allocationInfo.TopologyAwareAssignments)
info.AllocationInfo.OriginalTopologyAwareAssignments = machine.ParseCPUAssignmentFormat(allocationInfo.OriginalTopologyAwareAssignments)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,28 +151,33 @@ func (p *PolicyNUMAAware) Update() (err error) {
if reclaimableMemory > 0 {
reduceRatio = p.memoryHeadroom / reclaimableMemory
}
numaHeadroom := 0.0

totalNUMAHeadroom := 0.0
allNUMAs := p.metaServer.CPUDetails.NUMANodes().ToSliceInt()
numaHeadroom := make(map[int]resource.Quantity, len(allNUMAs))
for numaID := range numaReclaimableMemory {
numaReclaimableMemory[numaID] *= reduceRatio
numaHeadroom += numaReclaimableMemory[numaID]
p.numaMemoryHeadroom[numaID] = *resource.NewQuantity(int64(numaReclaimableMemory[numaID]), resource.BinarySI)
totalNUMAHeadroom += numaReclaimableMemory[numaID]
numaHeadroom[numaID] = *resource.NewQuantity(int64(numaReclaimableMemory[numaID]), resource.BinarySI)
general.InfoS("memory reclaimable per NUMA", "NUMA-ID", numaID, "headroom", numaReclaimableMemory[numaID])
}

allNUMAs := p.metaServer.CPUDetails.NUMANodes()
for _, numaID := range allNUMAs.ToSliceInt() {
if _, ok := p.numaMemoryHeadroom[numaID]; !ok {
for _, numaID := range allNUMAs {
if _, ok := numaHeadroom[numaID]; !ok {
general.InfoS("set non-reclaim NUMA memory reclaimable as empty", "NUMA-ID", numaID)
p.numaMemoryHeadroom[numaID] = *resource.NewQuantity(0, resource.BinarySI)
numaHeadroom[numaID] = *resource.NewQuantity(0, resource.BinarySI)
}
}

p.numaMemoryHeadroom = numaHeadroom

general.InfoS("total memory reclaimable",
"reclaimableMemory", general.FormatMemoryQuantity(reclaimableMemory),
"ResourceUpperBound", general.FormatMemoryQuantity(p.essentials.ResourceUpperBound),
"systemWatermarkReserved", general.FormatMemoryQuantity(systemWatermarkReserved),
"reservedForAllocate", general.FormatMemoryQuantity(reservedForAllocate),
"headroom", p.memoryHeadroom,
"totalNUMAHeadroom", totalNUMAHeadroom,
"numaHeadroom", numaHeadroom,
)
return nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/agent/sysadvisor/plugin/qosaware/server/cpu_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ func (cs *cpuServer) setContainerInfoBasedOnAllocationInfo(
}

// fill in topology aware assignment for containers with owner pool
if ci.QoSLevel != consts.PodAnnotationQoSLevelDedicatedCores {
if info.TopologyAwareAssignments == nil {
if len(ci.OwnerPoolName) > 0 {
if poolInfo, ok := cs.metaCache.GetPoolInfo(ci.OwnerPoolName); ok {
ci.TopologyAwareAssignments = poolInfo.TopologyAwareAssignments.Clone()
Expand Down

0 comments on commit 8168777

Please sign in to comment.