Skip to content

Commit

Permalink
Merge pull request #763 from luomingmeng/dev/alloc-and-get-hints-fail…
Browse files Browse the repository at this point in the history
…ed-add-resize-tag

chore(qrm): qrm cpu/memory plugin alloc_failed and get_topology_hints…
  • Loading branch information
luomingmeng authored Jan 22, 2025
2 parents fe23afb + 6d52a09 commit fb6d1d8
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
9 changes: 7 additions & 2 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package dynamicpolicy
import (
"context"
"fmt"
"strconv"
"sync"
"time"

Expand Down Expand Up @@ -695,13 +696,16 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context,
defer func() {
p.RUnlock()
if err != nil {
inplaceUpdateResizing := util.PodInplaceUpdateResizing(req)
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)})
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)},
metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(inplaceUpdateResizing)})

general.ErrorS(err, "GetTopologyHints failed",
"podNamespace", req.PodNamespace,
"podName", req.PodName,
"containerName", req.ContainerName,
"inplaceUpdateResizing", inplaceUpdateResizing,
)
}
general.InfoS("finished",
Expand Down Expand Up @@ -844,7 +848,8 @@ func (p *DynamicPolicy) Allocate(ctx context.Context,
} else if respErr != nil {
_ = p.removeContainer(req.PodUid, req.ContainerName)
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)})
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)},
metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(util.PodInplaceUpdateResizing(req))})
}

p.Unlock()
Expand Down
9 changes: 7 additions & 2 deletions pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"errors"
"fmt"
"strconv"
"sync"
"time"

Expand Down Expand Up @@ -585,12 +586,15 @@ func (p *DynamicPolicy) GetTopologyHints(ctx context.Context,
defer func() {
p.RUnlock()
if err != nil {
inplaceUpdateResizing := util.PodInplaceUpdateResizing(req)
_ = p.emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)})
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(err)},
metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(inplaceUpdateResizing)})
general.ErrorS(err, "GetTopologyHints failed",
"podNamespace", req.PodNamespace,
"podName", req.PodName,
"containerName", req.ContainerName,
"inplaceUpdateResizing", inplaceUpdateResizing,
)
}
general.InfoS("finished",
Expand Down Expand Up @@ -949,7 +953,8 @@ func (p *DynamicPolicy) Allocate(ctx context.Context,
} else if respErr != nil {
_ = p.removeContainer(req.PodUid, req.ContainerName)
_ = p.emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)})
metrics.MetricTag{Key: "error_message", Val: metric.MetricTagValueFormat(respErr)},
metrics.MetricTag{Key: util.MetricTagNameInplaceUpdateResizing, Val: strconv.FormatBool(util.PodInplaceUpdateResizing(req))})
}

p.Unlock()
Expand Down
4 changes: 4 additions & 0 deletions pkg/agent/qrm-plugins/util/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ const (
MetricNameShareCoresNoEnoughResourceFailed = "share_cores_no_enough_resource"
)

const (
MetricTagNameInplaceUpdateResizing = "inplaceUpdateResizing"
)

// those are OCI property names to be used by QRM plugins
const (
OCIPropertyNameCPUSetCPUs = "CpusetCpus"
Expand Down

0 comments on commit fb6d1d8

Please sign in to comment.