From 7d914b5b7a3c5cdc078853966c70dd6079289123 Mon Sep 17 00:00:00 2001 From: "Wisniewski, Krzysztof2" Date: Fri, 18 Sep 2020 12:03:42 +0200 Subject: [PATCH] Add cgroup_memory_migrate metric Signed-off-by: Wisniewski, Krzysztof2 --- cmd/cadvisor.go | 4 +++- cmd/cadvisor_test.go | 1 + container/factory.go | 2 ++ container/libcontainer/handler.go | 7 +++++++ docs/runtime_options.md | 2 +- docs/storage/prometheus.md | 1 + info/v1/container.go | 6 ++++++ metrics/prometheus.go | 10 ++++++++++ metrics/prometheus_fake.go | 1 + metrics/testdata/prometheus_metrics | 3 +++ 10 files changed, 35 insertions(+), 2 deletions(-) diff --git a/cmd/cadvisor.go b/cmd/cadvisor.go index ade2d22829..80f76c51ed 100644 --- a/cmd/cadvisor.go +++ b/cmd/cadvisor.go @@ -91,6 +91,7 @@ var ( container.ReferencedMemoryMetrics: struct{}{}, container.CPUTopologyMetrics: struct{}{}, container.ResctrlMetrics: struct{}{}, + container.CPUSetMetrics: struct{}{}, }} // List of metrics that can be ignored. @@ -110,6 +111,7 @@ var ( container.ReferencedMemoryMetrics: struct{}{}, container.CPUTopologyMetrics: struct{}{}, container.ResctrlMetrics: struct{}{}, + container.CPUSetMetrics: struct{}{}, } ) @@ -141,7 +143,7 @@ func (ml *metricSetValue) Set(value string) error { } func init() { - flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'accelerator', 'cpu_topology','disk', 'diskIO', 'memory_numa', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory', 'resctrl'.") + flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'accelerator', 'cpu_topology','disk', 'diskIO', 'memory_numa', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory', 'resctrl', 'cpuset'.") // Default logging verbosity to V(2) flag.Set("v", "2") diff --git a/cmd/cadvisor_test.go b/cmd/cadvisor_test.go index 093a348118..1e22dfd9ad 100644 --- a/cmd/cadvisor_test.go +++ b/cmd/cadvisor_test.go @@ -108,6 +108,7 @@ func TestToIncludedMetrics(t *testing.T) { container.ReferencedMemoryMetrics: struct{}{}, container.CPUTopologyMetrics: struct{}{}, container.ResctrlMetrics: struct{}{}, + container.CPUSetMetrics: struct{}{}, }, container.AllMetrics, {}, diff --git a/container/factory.go b/container/factory.go index 652070b1b4..56d198976e 100644 --- a/container/factory.go +++ b/container/factory.go @@ -63,6 +63,7 @@ const ( ReferencedMemoryMetrics MetricKind = "referenced_memory" CPUTopologyMetrics MetricKind = "cpu_topology" ResctrlMetrics MetricKind = "resctrl" + CPUSetMetrics MetricKind = "cpuset" ) // AllMetrics represents all kinds of metrics that cAdvisor supported. @@ -87,6 +88,7 @@ var AllMetrics = MetricSet{ ReferencedMemoryMetrics: struct{}{}, CPUTopologyMetrics: struct{}{}, ResctrlMetrics: struct{}{}, + CPUSetMetrics: struct{}{}, } func (mk MetricKind) String() string { diff --git a/container/libcontainer/handler.go b/container/libcontainer/handler.go index 1094b10392..ce1ef040aa 100644 --- a/container/libcontainer/handler.go +++ b/container/libcontainer/handler.go @@ -870,6 +870,10 @@ func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) { ret.Memory.WorkingSet = workingSet } +func setCPUSetStats(s *cgroups.Stats, ret *info.ContainerStats) { + ret.CpuSet.MemoryMigrate = s.CPUSetStats.MemoryMigrate +} + func getNumaStats(memoryStats map[uint8]uint64) map[uint8]uint64 { stats := make(map[uint8]uint64, len(memoryStats)) for node, usage := range memoryStats { @@ -947,6 +951,9 @@ func newContainerStats(libcontainerStats *libcontainer.Stats, includedMetrics co if includedMetrics.Has(container.HugetlbUsageMetrics) { setHugepageStats(s, ret) } + if includedMetrics.Has(container.CPUSetMetrics) { + setCPUSetStats(s, ret) + } } if len(libcontainerStats.Interfaces) > 0 { setNetworkStats(libcontainerStats, ret) diff --git a/docs/runtime_options.md b/docs/runtime_options.md index af9b5cb831..ad8234b489 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -125,7 +125,7 @@ cAdvisor stores the latest historical data in memory. How long of a history it s --application_metrics_count_limit=100: Max number of application metrics to store (per container) (default 100) --collector_cert="": Collector's certificate, exposed to endpoints for certificate based authentication. --collector_key="": Key for the collector's certificate ---disable_metrics=tcp,advtcp,udp,sched,process,hugetlb: comma-separated list of metrics to be disabled. Options are 'disk', 'network', 'tcp', 'advtcp', 'udp', 'sched', 'process', 'hugetlb'. Note: tcp and udp are disabled by default due to high CPU usage. (default tcp,advtcp,udp,sched,process,hugetlb) +--disable_metrics=tcp,advtcp,udp,sched,process,hugetlb: comma-separated list of metrics to be disabled. Options are 'disk', 'network', 'tcp', 'advtcp', 'udp', 'sched', 'process', 'hugetlb', 'cpuset'. Note: tcp and udp are disabled by default due to high CPU usage. (default tcp,advtcp,udp,sched,process,hugetlb,cpuset) --prometheus_endpoint="/metrics": Endpoint to expose Prometheus metrics on (default "/metrics") --disable_root_cgroup_stats=false: Disable collecting root Cgroup stats ``` diff --git a/docs/storage/prometheus.md b/docs/storage/prometheus.md index 36a847b228..bdb8b42f04 100644 --- a/docs/storage/prometheus.md +++ b/docs/storage/prometheus.md @@ -64,6 +64,7 @@ Metric name | Type | Description | Unit (where applicable) | -disable_metrics pa `container_memory_rss` | Gauge | Size of RSS | bytes | | `container_memory_swap` | Gauge | Container swap usage | bytes | | `container_memory_mapped_file` | Gauge | Size of memory mapped files | bytes | | +`container_memory_migrate` | Gauge | Memory migrate status | | cpuset | `container_memory_usage_bytes` | Gauge | Current memory usage, including all memory regardless of when it was accessed | bytes | | `container_memory_working_set_bytes` | Gauge | Current working set | bytes | | `container_network_receive_bytes_total` | Counter | Cumulative count of bytes received | bytes | network | diff --git a/info/v1/container.go b/info/v1/container.go index 08cff3940f..9fe04fddea 100644 --- a/info/v1/container.go +++ b/info/v1/container.go @@ -399,6 +399,10 @@ type MemoryStats struct { HierarchicalData MemoryStatsMemoryData `json:"hierarchical_data,omitempty"` } +type CPUSetStats struct { + MemoryMigrate uint64 `json:"memory_migrate"` +} + type MemoryNumaStats struct { File map[uint8]uint64 `json:"file,omitempty"` Anon map[uint8]uint64 `json:"anon,omitempty"` @@ -957,6 +961,8 @@ type ContainerStats struct { // Resource Control (resctrl) statistics Resctrl ResctrlStats `json:"resctrl,omitempty"` + + CpuSet CPUSetStats `json:"cpuset,omitempty"` } func timeEq(t1, t2 time.Time, tolerance time.Duration) bool { diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 1064e045a2..5dea95e60b 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -455,6 +455,16 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri }, }...) } + if includedMetrics.Has(container.CPUSetMetrics) { + c.containerMetrics = append(c.containerMetrics, containerMetric{ + name: "container_memory_migrate", + help: "Memory migrate status.", + valueType: prometheus.GaugeValue, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{{value: float64(s.CpuSet.MemoryMigrate), timestamp: s.Timestamp}} + }, + }) + } if includedMetrics.Has(container.MemoryNumaMetrics) { c.containerMetrics = append(c.containerMetrics, []containerMetric{ { diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index 6368c0b75e..8ff03e797c 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -708,6 +708,7 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req }, }, }, + CpuSet: info.CPUSetStats{MemoryMigrate: 1}, }, }, }, diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index d8ba128f84..190268c56a 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -152,6 +152,9 @@ container_memory_mapped_file{container_env_foo_env="prod",container_label_foo_la # HELP container_memory_max_usage_bytes Maximum memory usage recorded in bytes # TYPE container_memory_max_usage_bytes gauge container_memory_max_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 8 1395066363000 +# HELP container_memory_migrate Memory migrate status. +# TYPE container_memory_migrate gauge +container_memory_migrate{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000 # HELP container_memory_numa_pages Number of used pages per NUMA node # TYPE container_memory_numa_pages gauge container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",node="0",scope="container",type="anon",zone_name="hello"} 10000 1395066363000