Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect CPU topology on single NUMA and multi socket platform. #2799

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions utils/sysinfo/sysinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
for _, cpuDir := range cpuDirs {
cpuID, err := getMatchedInt(cpuDirRegExp, cpuDir)
if err != nil {
return nil, fmt.Errorf("Unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir)
return nil, fmt.Errorf("unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir)
}
if !sysFs.IsCPUOnline(cpuDir) {
continue
Expand All @@ -401,9 +401,22 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
return nil, err
}

rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir)
if os.IsNotExist(err) {
klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}

physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
if err != nil {
return nil, err
}

coreIDx := -1
for id, core := range cores {
if core.Id == physicalID {
if core.Id == physicalID && core.SocketID == physicalPackageID {
coreIDx = id
}
}
Expand All @@ -414,25 +427,14 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
desiredCore := &cores[coreIDx]

desiredCore.Id = physicalID
desiredCore.SocketID = physicalPackageID

if len(desiredCore.Threads) == 0 {
desiredCore.Threads = []int{cpuID}
} else {
desiredCore.Threads = append(desiredCore.Threads, cpuID)
}

rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir)
if os.IsNotExist(err) {
klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}

physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
if err != nil {
return nil, err
}
desiredCore.SocketID = physicalPackageID
}
return cores, nil
}
Expand Down
245 changes: 176 additions & 69 deletions utils/sysinfo/sysinfo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,71 +104,62 @@ func TestGetHugePagesInfoWithWrongNrHugePageValue(t *testing.T) {
}

func TestGetNodesInfo(t *testing.T) {
fakeSys := &fakesysfs.FakeSysFs{}
c := sysfs.CacheInfo{
Size: 32 * 1024,
Type: "unified",
Level: 3,
Cpus: 2,
}
fakeSys.SetCacheInfo(c)

nodesPaths := []string{
"/fakeSysfs/devices/system/node/node0",
"/fakeSysfs/devices/system/node/node1",
}
fakeSys.SetNodesPaths(nodesPaths, nil)

cpusPaths := map[string][]string{
"/fakeSysfs/devices/system/node/node0": {
"/fakeSysfs/devices/system/node/node0/cpu0",
"/fakeSysfs/devices/system/node/node0/cpu1",
},
"/fakeSysfs/devices/system/node/node1": {
"/fakeSysfs/devices/system/node/node0/cpu2",
"/fakeSysfs/devices/system/node/node0/cpu3",
},
}
fakeSys.SetCPUsPaths(cpusPaths, nil)

coreThread := map[string]string{
"/fakeSysfs/devices/system/node/node0/cpu0": "0",
"/fakeSysfs/devices/system/node/node0/cpu1": "0",
"/fakeSysfs/devices/system/node/node0/cpu2": "1",
"/fakeSysfs/devices/system/node/node0/cpu3": "1",
}
fakeSys.SetCoreThreads(coreThread, nil)

memTotal := "MemTotal: 32817192 kB"
fakeSys.SetMemory(memTotal, nil)

hugePages := []os.FileInfo{
&fakesysfs.FileInfo{EntryName: "hugepages-2048kB"},
}
fakeSys.SetHugePages(hugePages, nil)

hugePageNr := map[string]string{
"/fakeSysfs/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages": "1",
"/fakeSysfs/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages": "1",
}
fakeSys.SetHugePagesNr(hugePageNr, nil)

physicalPackageIDs := map[string]string{
"/fakeSysfs/devices/system/node/node0/cpu0": "0",
"/fakeSysfs/devices/system/node/node0/cpu1": "0",
"/fakeSysfs/devices/system/node/node0/cpu2": "1",
"/fakeSysfs/devices/system/node/node0/cpu3": "1",
}
fakeSys.SetPhysicalPackageIDs(physicalPackageIDs, nil)

nodes, cores, err := GetNodesInfo(fakeSys)
assert.Nil(t, err)
assert.Equal(t, 2, len(nodes))
assert.Equal(t, 4, cores)

nodesJSON, err := json.Marshal(nodes)
assert.Nil(t, err)
expectedNodes := `
testCases := []struct {
cache sysfs.CacheInfo
nodesPaths []string
cpusPaths map[string][]string
coresThreads map[string]string
memTotal string
hugePages []os.FileInfo
hugePageNr map[string]string
physicalPackageIDs map[string]string
nodes int
cores int
expectedNodes string
}{
{
sysfs.CacheInfo{
Size: 32 * 1024,
Type: "unified",
Level: 3,
Cpus: 2,
},
[]string{
"/fakeSysfs/devices/system/node/node0",
"/fakeSysfs/devices/system/node/node1"},
map[string][]string{
"/fakeSysfs/devices/system/node/node0": {
"/fakeSysfs/devices/system/node/node0/cpu0",
"/fakeSysfs/devices/system/node/node0/cpu1",
},
"/fakeSysfs/devices/system/node/node1": {
"/fakeSysfs/devices/system/node/node0/cpu2",
"/fakeSysfs/devices/system/node/node0/cpu3",
},
},
map[string]string{
"/fakeSysfs/devices/system/node/node0/cpu0": "0",
"/fakeSysfs/devices/system/node/node0/cpu1": "0",
"/fakeSysfs/devices/system/node/node0/cpu2": "1",
"/fakeSysfs/devices/system/node/node0/cpu3": "1",
},
"MemTotal: 32817192 kB",
[]os.FileInfo{
&fakesysfs.FileInfo{EntryName: "hugepages-2048kB"},
},
map[string]string{
"/fakeSysfs/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages": "1",
"/fakeSysfs/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages": "1",
},
map[string]string{
"/fakeSysfs/devices/system/node/node0/cpu0": "0",
"/fakeSysfs/devices/system/node/node0/cpu1": "0",
"/fakeSysfs/devices/system/node/node0/cpu2": "1",
"/fakeSysfs/devices/system/node/node0/cpu3": "1",
},
2,
4,
`
[
{
"node_id": 0,
Expand Down Expand Up @@ -227,8 +218,124 @@ func TestGetNodesInfo(t *testing.T) {
]
}
]
`
assert.JSONEq(t, expectedNodes, string(nodesJSON))
`,
},
{
sysfs.CacheInfo{
Size: 32 * 1024,
Type: "unified",
Level: 3,
Cpus: 6,
},
[]string{
"/fakeSysfs/devices/system/node/node0"},
map[string][]string{
"/fakeSysfs/devices/system/node/node0": {
"/fakeSysfs/devices/system/node/node0/cpu0",
"/fakeSysfs/devices/system/node/node0/cpu1",
"/fakeSysfs/devices/system/node/node0/cpu2",
"/fakeSysfs/devices/system/node/node0/cpu3",
"/fakeSysfs/devices/system/node/node0/cpu4",
"/fakeSysfs/devices/system/node/node0/cpu5",
},
},
map[string]string{
"/fakeSysfs/devices/system/node/node0/cpu0": "0",
"/fakeSysfs/devices/system/node/node0/cpu1": "0",
"/fakeSysfs/devices/system/node/node0/cpu2": "1",
"/fakeSysfs/devices/system/node/node0/cpu3": "1",
"/fakeSysfs/devices/system/node/node0/cpu4": "2",
"/fakeSysfs/devices/system/node/node0/cpu5": "2",
},
"MemTotal: 32817192 kB",
[]os.FileInfo{
&fakesysfs.FileInfo{EntryName: "hugepages-2048kB"},
},
map[string]string{
"/fakeSysfs/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages": "1",
},
map[string]string{
"/fakeSysfs/devices/system/node/node0/cpu0": "0",
"/fakeSysfs/devices/system/node/node0/cpu1": "0",
"/fakeSysfs/devices/system/node/node0/cpu2": "1",
"/fakeSysfs/devices/system/node/node0/cpu3": "1",
"/fakeSysfs/devices/system/node/node0/cpu4": "2",
"/fakeSysfs/devices/system/node/node0/cpu5": "2",
},
1,
6,
`
[
{
"node_id": 0,
"memory": 33604804608,
"hugepages": [
{
"page_size": 2048,
"num_pages": 1
}
],
"cores": [
{
"core_id": 0,
"thread_ids": [
0,
1
],
"caches": null,
"socket_id": 0
},
{
"core_id": 1,
"thread_ids": [
2,
3
],
"caches": null,
"socket_id": 1
},
{
"core_id": 2,
"thread_ids": [
4,
5
],
"caches": null,
"socket_id": 2
}
],
"caches": [
{
"size": 32768,
"type": "unified",
"level": 3
}
]
}
]
`,
},
}

for _, test := range testCases {
fakeSys := &fakesysfs.FakeSysFs{}
fakeSys.SetCacheInfo(test.cache)
fakeSys.SetNodesPaths(test.nodesPaths, nil)
fakeSys.SetCPUsPaths(test.cpusPaths, nil)
fakeSys.SetCoreThreads(test.coresThreads, nil)
fakeSys.SetMemory(test.memTotal, nil)
fakeSys.SetHugePages(test.hugePages, nil)
fakeSys.SetHugePagesNr(test.hugePageNr, nil)
fakeSys.SetPhysicalPackageIDs(test.physicalPackageIDs, nil)
nodes, cores, err := GetNodesInfo(fakeSys)
assert.Nil(t, err)
assert.Equal(t, test.nodes, len(nodes))
assert.Equal(t, test.cores, cores)

nodesJSON, err := json.Marshal(nodes)
assert.Nil(t, err)
assert.JSONEq(t, test.expectedNodes, string(nodesJSON))
}
}

func TestGetNodesInfoWithOfflineCPUs(t *testing.T) {
Expand Down Expand Up @@ -996,7 +1103,7 @@ func TestGetNodesWhenPhysicalPackageIDMissingForOneCPU(t *testing.T) {
assert.Nil(t, err)

assert.Equal(t, 1, len(nodes))
assert.Equal(t, 2, cores)
assert.Equal(t, 1, cores)

sort.Slice(nodes, func(i, j int) bool {
return nodes[i].Id < nodes[j].Id
Expand All @@ -1019,7 +1126,7 @@ func TestGetNodesWhenPhysicalPackageIDMissingForOneCPU(t *testing.T) {
{
"core_id":0,
"thread_ids":[
0, 1
0
],
"caches":null,
"socket_id":0
Expand Down