Skip to content
This repository has been archived by the owner on Jan 29, 2025. It is now read-only.

Commit

Permalink
fix GAS xe link label usage
Browse files Browse the repository at this point in the history
This changes the label interpretation so that both directions no
longer need to be defined.

Signed-off-by: Ukri Niemimuukko <[email protected]>
  • Loading branch information
uniemimu committed Jun 9, 2023
1 parent 471e539 commit f61f135
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 15 deletions.
25 changes: 18 additions & 7 deletions gpu-aware-scheduling/pkg/gpuscheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -897,8 +897,8 @@ func TestFilterWithXeLinkedDisabledTiles(t *testing.T) {
Labels: map[string]string{
"gpu.intel.com/gpu-numbers": "0.1.2.3",
"gpu.intel.com/tiles": "4",
xeLinksLabel: "0.0-1.0_1.0-0.0_2.1",
xeLinksLabel + "2": "Z-3.2_3.2-2.1",
xeLinksLabel: "0.0-1.0_2.1",
xeLinksLabel + "2": "Z-3.2",
},
},
Status: v1.NodeStatus{
Expand Down Expand Up @@ -1032,8 +1032,7 @@ func TestRunSchedulingLogicWithMultiContainerXelinkedTileResourceReq(t *testing.
testCases := []testCase{
{
extraLabels: map[string]string{
xeLinksLabel: "0.0-1.0_1.0-0.0",
xeLinksLabel + "2": "Z_2.1-3.2_3.2-2.1",
xeLinksLabel: "0.0-1.0_2.1-3.2",
},
extraAnnotations: map[string]string{xelinkAnnotationName: trueValueString},
description: "4 card xe-linked success case",
Expand All @@ -1044,7 +1043,19 @@ func TestRunSchedulingLogicWithMultiContainerXelinkedTileResourceReq(t *testing.
},
{
extraLabels: map[string]string{
xeLinksLabel: "0.0-1.0_1.0-0.0_2.1-3.2_3.2-2.1",
xeLinksLabel: "0.0-1.0",
xeLinksLabel + "2": "Z_2.1-3.2",
},
extraAnnotations: map[string]string{xelinkAnnotationName: trueValueString},
description: "4 card xe-linked success case",
expectError: false,
expectedCardAnnotation: "card0,card1|card2,card3",
expectTimestamp: true,
defaultTileCheck: true,
},
{
extraLabels: map[string]string{
xeLinksLabel: "0.0-1.0_2.1-3.2",
numaMappingLabel: "0-0.1_1",
numaMappingLabel + "2": "Z-2.3",
},
Expand All @@ -1060,7 +1071,7 @@ func TestRunSchedulingLogicWithMultiContainerXelinkedTileResourceReq(t *testing.
},
{
extraLabels: map[string]string{
xeLinksLabel: "0.0-2.0_2.0-0.0_2.1-3.2_3.2-2.1",
xeLinksLabel: "0.0-2.0_2.1-3.2",
numaMappingLabel: "0-0.1_1-2.3",
},
extraAnnotations: map[string]string{
Expand All @@ -1075,7 +1086,7 @@ func TestRunSchedulingLogicWithMultiContainerXelinkedTileResourceReq(t *testing.
},
{
extraLabels: map[string]string{
xeLinksLabel: "0.0-2.0_2.0-0.0_2.1-3.2_3.2-2.1",
xeLinksLabel: "0.0-2.0_2.1-3.2",
numaMappingLabel: "0-0.1_1-2.3",
},
extraAnnotations: map[string]string{xelinkAnnotationName: trueValueString},
Expand Down
23 changes: 15 additions & 8 deletions gpu-aware-scheduling/pkg/gpuscheduler/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,15 +368,22 @@ func getXeLinkedTiles(gpuName string, node *v1.Node) map[int]bool {

for _, linkPair := range xeLinkSlice {
submatches := xeLinkReg.FindStringSubmatch(linkPair)
if len(submatches) == regexXeLinkCount {
if submatches[1] == strconv.Itoa(lZeroDeviceID) {
tileNumber, err := strconv.Atoi(submatches[2])
if err == nil {
xeLinkedTiles[tileNumber] = true
}
}
} else {
if len(submatches) != regexXeLinkCount {
klog.Errorf("Malformed Xe Link label part: %v", linkPair)

return xeLinkedTiles
}

if submatches[1] == strconv.Itoa(lZeroDeviceID) {
tileNumber, err := strconv.Atoi(submatches[2])
if err == nil {
xeLinkedTiles[tileNumber] = true
}
} else if submatches[3] == strconv.Itoa(lZeroDeviceID) {
tileNumber, err := strconv.Atoi(submatches[4])
if err == nil {
xeLinkedTiles[tileNumber] = true
}
}
}

Expand Down

0 comments on commit f61f135

Please sign in to comment.