From e29d3f12f193feab5805bb3ef884e976ccfe8a94 Mon Sep 17 00:00:00 2001 From: Pradyumna Agrawal Date: Mon, 7 Dec 2020 10:10:04 -0800 Subject: [PATCH 1/6] Fix race between `OnDemandHousekeeping` and `housekeepingTick` This fixes the data race between the functions which could be running in different goroutines. Signed-off-by: Pradyumna Agrawal --- manager/container.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/manager/container.go b/manager/container.go index c599300d84..721019df18 100644 --- a/manager/container.go +++ b/manager/container.go @@ -138,7 +138,10 @@ func (cd *containerData) allowErrorLogging() bool { // periodic housekeeping to reset. This should be used sparingly, as calling OnDemandHousekeeping frequently // can have serious performance costs. func (cd *containerData) OnDemandHousekeeping(maxAge time.Duration) { - if cd.clock.Since(cd.statsLastUpdatedTime) > maxAge { + cd.lock.Lock() + timeSinceStatsLastUpdate := cd.clock.Since(statsLastUpdatedTime) + cd.lock.Unock() + if timeSinceStatsLastUpdate > maxAge { housekeepingFinishedChan := make(chan struct{}) cd.onDemandChan <- housekeepingFinishedChan select { @@ -555,6 +558,8 @@ func (cd *containerData) housekeepingTick(timer <-chan time.Time, longHousekeepi klog.V(3).Infof("[%s] Housekeeping took %s", cd.info.Name, duration) } cd.notifyOnDemand() + cd.lock.Lock() + defer cd.lock.Unock() cd.statsLastUpdatedTime = cd.clock.Now() return true } From 89975816ad76a2f40061a5cbf37fc1906d6a97aa Mon Sep 17 00:00:00 2001 From: Pradyumna Agrawal Date: Mon, 7 Dec 2020 10:15:39 -0800 Subject: [PATCH 2/6] Fix typo in previous commit Signed-off-by: Pradyumna Agrawal --- manager/container.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manager/container.go b/manager/container.go index 721019df18..fef53f4d28 100644 --- a/manager/container.go +++ b/manager/container.go @@ -140,7 +140,7 @@ func (cd *containerData) allowErrorLogging() bool { func (cd *containerData) OnDemandHousekeeping(maxAge time.Duration) { cd.lock.Lock() timeSinceStatsLastUpdate := cd.clock.Since(statsLastUpdatedTime) - cd.lock.Unock() + cd.lock.Unlock() if timeSinceStatsLastUpdate > maxAge { housekeepingFinishedChan := make(chan struct{}) cd.onDemandChan <- housekeepingFinishedChan @@ -559,7 +559,7 @@ func (cd *containerData) housekeepingTick(timer <-chan time.Time, longHousekeepi } cd.notifyOnDemand() cd.lock.Lock() - defer cd.lock.Unock() + defer cd.lock.Unlock() cd.statsLastUpdatedTime = cd.clock.Now() return true } From bed430cb54f81a3b0add03c5efe9e40c21ef17eb Mon Sep 17 00:00:00 2001 From: Pradyumna Agrawal Date: Mon, 7 Dec 2020 10:27:40 -0800 Subject: [PATCH 3/6] cd.clock.Since(statsLastUpdatedTime) -> cd.clock.Since(cd.statsLastUpdatedTime) Signed-off-by: Pradyumna Agrawal --- manager/container.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manager/container.go b/manager/container.go index fef53f4d28..688e3564fd 100644 --- a/manager/container.go +++ b/manager/container.go @@ -139,7 +139,7 @@ func (cd *containerData) allowErrorLogging() bool { // can have serious performance costs. func (cd *containerData) OnDemandHousekeeping(maxAge time.Duration) { cd.lock.Lock() - timeSinceStatsLastUpdate := cd.clock.Since(statsLastUpdatedTime) + timeSinceStatsLastUpdate := cd.clock.Since(cd.statsLastUpdatedTime) cd.lock.Unlock() if timeSinceStatsLastUpdate > maxAge { housekeepingFinishedChan := make(chan struct{}) From ce4c3bae43372979a38dfa3ac42520aa71729899 Mon Sep 17 00:00:00 2001 From: Pradyumna Agrawal Date: Tue, 29 Dec 2020 14:52:06 -0800 Subject: [PATCH 4/6] add a test case Signed-off-by: Pradyumna Agrawal --- manager/container_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/manager/container_test.go b/manager/container_test.go index cdb1228ca2..63872ed6ea 100644 --- a/manager/container_test.go +++ b/manager/container_test.go @@ -319,3 +319,34 @@ func TestOnDemandHousekeepingReturnsAfterStopped(t *testing.T) { mockHandler.AssertExpectations(t) } + +func TestOnDemandHousekeepingRace(t *testing.T) { + statsList := itest.GenerateRandomStats(1, 4, 1*time.Second) + stats := statsList[0] + + cd, mockHandler, _, _ := newTestContainerData(t) + mockHandler.On("GetStats").Return(stats, nil) + + wg := sync.WaitGroup{} + wg.Add(1002) + + go func() { + time.Sleep(10 * time.Millisecond) + err := cd.Start() + assert.NoError(t, err) + wg.Done() + }() + + go func() { + t.Log("starting on demand goroutine") + for i := 0; i < 1000; i++ { + go func() { + time.Sleep(1 * time.Microsecond) + cd.OnDemandHousekeeping(0 * time.Millisecond) + wg.Done() + }() + } + wg.Done() + }() + wg.Wait() +} From 5b4077983cf945b1810220d2d200cdcd082f8974 Mon Sep 17 00:00:00 2001 From: "Maciej \"Iwan\" Iwanowski" Date: Fri, 4 Dec 2020 01:16:20 +0100 Subject: [PATCH 5/6] Command may contain spaces Signed-off-by: Maciej "Iwan" Iwanowski --- manager/container.go | 182 ++++++++++++++++++++++---------------- manager/container_test.go | 157 +++++++++++++++++++++++++++++++- 2 files changed, 260 insertions(+), 79 deletions(-) diff --git a/manager/container.go b/manager/container.go index 688e3564fd..62ae5f5145 100644 --- a/manager/container.go +++ b/manager/container.go @@ -38,7 +38,7 @@ import ( "github.com/google/cadvisor/summary" "github.com/google/cadvisor/utils/cpuload" - units "github.com/docker/go-units" + "github.com/docker/go-units" "k8s.io/klog/v2" "k8s.io/utils/clock" ) @@ -286,106 +286,132 @@ func (cd *containerData) getContainerPids(inHostNamespace bool) ([]string, error } func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) { - // report all processes for root. - isRoot := cd.info.Name == "/" - rootfs := "/" - if !inHostNamespace { - rootfs = "/rootfs" - } format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,psr,cgroup" out, err := cd.getPsOutput(inHostNamespace, format) if err != nil { return nil, err } - expectedFields := 13 + return cd.parseProcessList(cadvisorContainer, inHostNamespace, out) +} + +func (cd *containerData) parseProcessList(cadvisorContainer string, inHostNamespace bool, out []byte) ([]v2.ProcessInfo, error) { + rootfs := "/" + if !inHostNamespace { + rootfs = "/rootfs" + } processes := []v2.ProcessInfo{} lines := strings.Split(string(out), "\n") for _, line := range lines[1:] { - if len(line) == 0 { - continue - } - fields := strings.Fields(line) - if len(fields) < expectedFields { - return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line) - } - pid, err := strconv.Atoi(fields[1]) + processInfo, err := cd.parsePsLine(line, cadvisorContainer, inHostNamespace) if err != nil { - return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err) + return nil, fmt.Errorf("could not parse line %s: %v", line, err) } - ppid, err := strconv.Atoi(fields[2]) - if err != nil { - return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err) - } - percentCPU, err := strconv.ParseFloat(fields[4], 32) - if err != nil { - return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err) - } - percentMem, err := strconv.ParseFloat(fields[5], 32) - if err != nil { - return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err) - } - rss, err := strconv.ParseUint(fields[6], 0, 64) - if err != nil { - return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err) - } - // convert to bytes - rss *= 1024 - vs, err := strconv.ParseUint(fields[7], 0, 64) - if err != nil { - return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err) - } - // convert to bytes - vs *= 1024 - psr, err := strconv.Atoi(fields[11]) - if err != nil { - return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err) - } - - cgroup, err := cd.getCgroupPath(fields[12]) - if err != nil { - return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[11], err) - } - // Remove the ps command we just ran from cadvisor container. - // Not necessary, but makes the cadvisor page look cleaner. - if !inHostNamespace && cadvisorContainer == cgroup && fields[10] == "ps" { + if processInfo == nil { continue } - var cgroupPath string - if isRoot { - cgroupPath = cgroup - } var fdCount int - dirPath := path.Join(rootfs, "/proc", strconv.Itoa(pid), "fd") + dirPath := path.Join(rootfs, "/proc", strconv.Itoa(processInfo.Pid), "fd") fds, err := ioutil.ReadDir(dirPath) if err != nil { klog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err) continue } fdCount = len(fds) - - if isRoot || cd.info.Name == cgroup { - processes = append(processes, v2.ProcessInfo{ - User: fields[0], - Pid: pid, - Ppid: ppid, - StartTime: fields[3], - PercentCpu: float32(percentCPU), - PercentMemory: float32(percentMem), - RSS: rss, - VirtualSize: vs, - Status: fields[8], - RunningTime: fields[9], - Cmd: fields[10], - CgroupPath: cgroupPath, - FdCount: fdCount, - Psr: psr, - }) - } + processInfo.FdCount = fdCount + processes = append(processes, *processInfo) } return processes, nil } +func (cd *containerData) isRoot() bool { + return cd.info.Name == "/" +} + +func (cd *containerData) parsePsLine(line, cadvisorContainer string, inHostNamespace bool) (*v2.ProcessInfo, error) { + const expectedFields = 13 + if len(line) == 0 { + return nil, nil + } + + info := v2.ProcessInfo{} + var err error + + fields := strings.Fields(line) + if len(fields) < expectedFields { + return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line) + } + info.User = fields[0] + info.StartTime = fields[3] + info.Status = fields[8] + info.RunningTime = fields[9] + + info.Pid, err = strconv.Atoi(fields[1]) + if err != nil { + return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err) + } + info.Ppid, err = strconv.Atoi(fields[2]) + if err != nil { + return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err) + } + + percentCPU, err := strconv.ParseFloat(fields[4], 32) + if err != nil { + return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err) + } + info.PercentCpu = float32(percentCPU) + percentMem, err := strconv.ParseFloat(fields[5], 32) + if err != nil { + return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err) + } + info.PercentMemory = float32(percentMem) + + info.RSS, err = strconv.ParseUint(fields[6], 0, 64) + if err != nil { + return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err) + } + info.VirtualSize, err = strconv.ParseUint(fields[7], 0, 64) + if err != nil { + return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err) + } + // convert to bytes + info.RSS *= 1024 + info.VirtualSize *= 1024 + + // According to `man ps`: The following user-defined format specifiers may contain spaces: args, cmd, comm, command, + // fname, ucmd, ucomm, lstart, bsdstart, start. + // Therefore we need to be able to parse comm that consists of multiple space-separated parts. + info.Cmd = strings.Join(fields[10:len(fields)-2], " ") + + // These are last two parts of the line. We create a subslice of `fields` to handle comm that includes spaces. + lastTwoFields := fields[len(fields)-2:] + info.Psr, err = strconv.Atoi(lastTwoFields[0]) + if err != nil { + return nil, fmt.Errorf("invalid psr %q: %v", lastTwoFields[0], err) + } + info.CgroupPath, err = cd.getCgroupPath(lastTwoFields[1]) + if err != nil { + return nil, fmt.Errorf("could not parse cgroup path from %q: %v", lastTwoFields[1], err) + } + + // Remove the ps command we just ran from cadvisor container. + // Not necessary, but makes the cadvisor page look cleaner. + if !inHostNamespace && cadvisorContainer == info.CgroupPath && info.Cmd == "ps" { + return nil, nil + } + + // Do not report processes from other containers when non-root container requested. + if !cd.isRoot() && info.CgroupPath != cd.info.Name { + return nil, nil + } + + // Remove cgroup information when non-root container requested. + if !cd.isRoot() { + info.CgroupPath = "" + } + return &info, nil +} + func newContainerData(containerName string, memoryCache *memory.InMemoryCache, handler container.ContainerHandler, logUsage bool, collectorManager collector.CollectorManager, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, clock clock.Clock) (*containerData, error) { if memoryCache == nil { return nil, fmt.Errorf("nil memory storage") @@ -519,7 +545,7 @@ func (cd *containerData) housekeeping() { usageCPUNs := uint64(0) for i := range stats { if i > 0 { - usageCPUNs += (stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total) + usageCPUNs += stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total } } usageMemory := stats[numSamples-1].Memory.Usage diff --git a/manager/container_test.go b/manager/container_test.go index 63872ed6ea..6efae33563 100644 --- a/manager/container_test.go +++ b/manager/container_test.go @@ -29,12 +29,14 @@ import ( containertest "github.com/google/cadvisor/container/testing" info "github.com/google/cadvisor/info/v1" itest "github.com/google/cadvisor/info/v1/test" + v2 "github.com/google/cadvisor/info/v2" - "github.com/google/cadvisor/accelerators" "github.com/mindprince/gonvml" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" clock "k8s.io/utils/clock/testing" + + "github.com/google/cadvisor/accelerators" ) const ( @@ -350,3 +352,156 @@ func TestOnDemandHousekeepingRace(t *testing.T) { }() wg.Wait() } + +var psOutput = [][]byte{ + []byte("root 15886 2 23:51 0.1 0.0 0 0 I 00:00:00 kworker/u8:3-ev 3 -\nroot 15887 2 23:51 0.0 0.0 0 0 I< 00:00:00 kworker/1:2H 1 -\nubuntu 15888 1804 23:51 0.0 0.0 2832 10176 R+ 00:00:00 ps 1 8:devices:/user.slice,6:pids:/user.slice/user-1000.slice/session-3.scope,5:blkio:/user.slice,2:cpu,cpuacct:/user.slice,1:na"), + []byte("root 104 2 21:34 0.0 0.0 0 0 I< 00:00:00 kthrotld 3 -\nroot 105 2 21:34 0.0 0.0 0 0 S 00:00:00 irq/41-aerdrv 0 -\nroot 107 2 21:34 0.0 0.0 0 0 I< 00:00:00 DWC Notificatio 3 -\nroot 109 2 21:34 0.0 0.0 0 0 S< 00:00:00 vchiq-slot/0 1 -\nroot 110 2 21:34 0.0 0.0 0 0 S< 00:00:00 vchiq-recy/0 3 -"), +} + +func TestParseProcessList(t *testing.T) { + for i, ps := range psOutput { + t.Run(fmt.Sprintf("iteration %d", i), func(tt *testing.T) { + cd := &containerData{} + _, err := cd.parseProcessList("/", true, ps) + // checking *only* parsing errors - otherwise /proc would have to be emulated. + assert.NoError(tt, err) + }) + } +} + +var psLine = []struct { + name string + line string + cadvisorContainer string + isHostNamespace bool + process *v2.ProcessInfo + err error + cd *containerData +}{ + { + name: "plain process with cgroup", + line: "ubuntu 15888 1804 23:51 0.1 0.0 2832 10176 R+ 00:10:00 cadvisor 1 10:cpuset:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,9:devices:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,8:pids:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,7:memory:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,6:freezer:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,5:perf_event:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,4:blkio:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,3:cpu,cpuacct:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,2:net_cls,net_prio:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,1:name=systemd:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + cadvisorContainer: "/docker/cadvisor", + isHostNamespace: true, + process: &v2.ProcessInfo{ + User: "ubuntu", + Pid: 15888, + Ppid: 1804, + StartTime: "23:51", + PercentCpu: 0.1, + PercentMemory: 0.0, + RSS: 2899968, + VirtualSize: 10420224, + Status: "R+", + RunningTime: "00:10:00", + CgroupPath: "/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + Cmd: "cadvisor", + Psr: 1, + }, + cd: &containerData{ + info: containerInfo{ContainerReference: info.ContainerReference{Name: "/"}}, + }, + }, + { + name: "process with space in name and no cgroup", + line: "root 107 2 21:34 0.0 0.1 3 4 I< 00:20:00 DWC Notificatio 3 -", + cadvisorContainer: "/docker/cadvisor", + process: &v2.ProcessInfo{ + User: "root", + Pid: 107, + Ppid: 2, + StartTime: "21:34", + PercentCpu: 0.0, + PercentMemory: 0.1, + RSS: 3072, + VirtualSize: 4096, + Status: "I<", + RunningTime: "00:20:00", + CgroupPath: "/", + Cmd: "DWC Notificatio", + Psr: 3, + }, + cd: &containerData{ + info: containerInfo{ContainerReference: info.ContainerReference{Name: "/"}}, + }, + }, + { + name: "process with highly unusual name (one 2 three 4 five 6 eleven), cgroup to be ignored", + line: "root 107 2 21:34 0.0 0.1 3 4 I< 00:20:00 one 2 three 4 five 6 eleven 3 10:cpuset:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,9:devices:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,8:pids:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,7:memory:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,6:freezer:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,5:perf_event:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,4:blkio:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,3:cpu,cpuacct:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,2:net_cls,net_prio:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,1:name=systemd:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + cadvisorContainer: "/docker/cadvisor", + isHostNamespace: true, + process: &v2.ProcessInfo{ + User: "root", + Pid: 107, + Ppid: 2, + StartTime: "21:34", + PercentCpu: 0.0, + PercentMemory: 0.1, + RSS: 3072, + VirtualSize: 4096, + Status: "I<", + RunningTime: "00:20:00", + Cmd: "one 2 three 4 five 6 eleven", + Psr: 3, + CgroupPath: "/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + }, + cd: &containerData{ + info: containerInfo{ContainerReference: info.ContainerReference{Name: "/"}}, + }, + }, + { + name: "wrong field count", + line: "ps output it is not", + cadvisorContainer: "/docker/cadvisor", + err: fmt.Errorf("expected at least 13 fields, found 5: output: \"ps output it is not\""), + cd: &containerData{}, + }, + { + name: "ps running in cadvisor container should be ignored", + line: "root 107 2 21:34 0.0 0.1 3 4 I< 00:20:00 ps 3 10:cpuset:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,9:devices:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,8:pids:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,7:memory:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,6:freezer:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,5:perf_event:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,4:blkio:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,3:cpu,cpuacct:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,2:net_cls,net_prio:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831,1:name=systemd:/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + cadvisorContainer: "/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + cd: &containerData{ + info: containerInfo{ContainerReference: info.ContainerReference{Name: "/"}}, + }, + }, + { + name: "non-root container but process belongs to the container", + line: "root 107 2 21:34 0.0 0.1 3 4 I< 00:20:00 sleep inf 3 10:cpuset:/docker/some-random-container,9:devices:/docker/some-random-container,8:pids:/docker/some-random-container,7:memory:/docker/some-random-container,6:freezer:/docker/some-random-container,5:perf_event:/docker/some-random-container,4:blkio:/docker/some-random-container,3:cpu,cpuacct:/docker/some-random-container,2:net_cls,net_prio:/docker/some-random-container,1:name=systemd:/docker/some-random-container", + process: &v2.ProcessInfo{ + User: "root", + Pid: 107, + Ppid: 2, + StartTime: "21:34", + PercentCpu: 0.0, + PercentMemory: 0.1, + RSS: 3072, + VirtualSize: 4096, + Status: "I<", + RunningTime: "00:20:00", + Cmd: "sleep inf", + Psr: 3, + }, + cadvisorContainer: "/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + cd: &containerData{ + info: containerInfo{ContainerReference: info.ContainerReference{Name: "/docker/some-random-container"}}, + }, + }, + { + name: "non-root container and process belonging to another container", + line: "root 107 2 21:34 0.0 0.1 3 4 I< 00:20:00 sleep inf 3 10:cpuset:/docker/some-random-container,9:devices:/docker/some-random-container,8:pids:/docker/some-random-container,7:memory:/docker/some-random-container,6:freezer:/docker/some-random-container,5:perf_event:/docker/some-random-container,4:blkio:/docker/some-random-container,3:cpu,cpuacct:/docker/some-random-container,2:net_cls,net_prio:/docker/some-random-container,1:name=systemd:/docker/some-random-container", + cadvisorContainer: "/docker/dd479c33249f6c3f0f1189aa88f07dad3eeb3e6fedfc71385c27ddd699994831", + cd: &containerData{ + info: containerInfo{ContainerReference: info.ContainerReference{Name: "/docker/some-other-container"}}, + }, + }, +} + +func TestParsePsLine(t *testing.T) { + for _, ps := range psLine { + t.Run(ps.name, func(tt *testing.T) { + process, err := ps.cd.parsePsLine(ps.line, ps.cadvisorContainer, ps.isHostNamespace) + assert.Equal(tt, ps.err, err) + assert.EqualValues(tt, ps.process, process) + }) + } +} From 795b605a70b6f3635c9114f242d8037cff036004 Mon Sep 17 00:00:00 2001 From: "Maciej \"Iwan\" Iwanowski" Date: Tue, 29 Dec 2020 12:49:24 +0100 Subject: [PATCH 6/6] Memory cgroup is not available on some systems Signed-off-by: Maciej "Iwan" Iwanowski --- manager/container.go | 44 +++++++++++++++++++-------------- manager/container_test.go | 52 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/manager/container.go b/manager/container.go index 62ae5f5145..c776e10d64 100644 --- a/manager/container.go +++ b/manager/container.go @@ -47,9 +47,14 @@ import ( var enableLoadReader = flag.Bool("enable_load_reader", false, "Whether to enable cpu load reader") var HousekeepingInterval = flag.Duration("housekeeping_interval", 1*time.Second, "Interval between container housekeepings") +// TODO: replace regular expressions with something simpler, such as strings.Split(). // cgroup type chosen to fetch the cgroup path of a process. -// Memory has been chosen, as it is one of the default cgroups that is enabled for most containers. -var cgroupPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`) +// Memory has been chosen, as it is one of the default cgroups that is enabled for most containers... +var cgroupMemoryPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`) + +// ... but there are systems (e.g. Raspberry Pi 4) where memory cgroup controller is disabled by default. +// We should check cpu cgroup then. +var cgroupCPUPathRegExp = regexp.MustCompile(`cpu[^:]*:(.*?)[,;$]`) type containerInfo struct { info.ContainerReference @@ -198,20 +203,28 @@ func (cd *containerData) DerivedStats() (v2.DerivedStats, error) { return cd.summaryReader.DerivedStats() } -func (cd *containerData) getCgroupPath(cgroups string) (string, error) { +func (cd *containerData) getCgroupPath(cgroups string) string { if cgroups == "-" { - return "/", nil + return "/" } if strings.HasPrefix(cgroups, "0::") { - return cgroups[3:], nil + return cgroups[3:] } - matches := cgroupPathRegExp.FindSubmatch([]byte(cgroups)) + matches := cgroupMemoryPathRegExp.FindSubmatch([]byte(cgroups)) if len(matches) != 2 { - klog.V(3).Infof("failed to get memory cgroup path from %q", cgroups) - // return root in case of failures - memory hierarchy might not be enabled. - return "/", nil + klog.V(3).Infof( + "failed to get memory cgroup path from %q, will try to get cpu cgroup path", + cgroups, + ) + // On some systems (e.g. Raspberry PI 4) cgroup memory controlled is disabled by default. + matches = cgroupCPUPathRegExp.FindSubmatch([]byte(cgroups)) + if len(matches) != 2 { + klog.V(3).Infof("failed to get cpu cgroup path from %q; assuming root cgroup", cgroups) + // return root in case of failures - memory hierarchy might not be enabled. + return "/" + } } - return string(matches[1]), nil + return string(matches[1]) } // Returns contents of a file inside the container root. @@ -274,10 +287,7 @@ func (cd *containerData) getContainerPids(inHostNamespace bool) ([]string, error return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line) } pid := fields[0] - cgroup, err := cd.getCgroupPath(fields[1]) - if err != nil { - return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[1], err) - } + cgroup := cd.getCgroupPath(fields[1]) if cd.info.Name == cgroup { pids = append(pids, pid) } @@ -319,6 +329,7 @@ func (cd *containerData) parseProcessList(cadvisorContainer string, inHostNamesp } fdCount = len(fds) processInfo.FdCount = fdCount + processes = append(processes, *processInfo) } return processes, nil @@ -389,10 +400,7 @@ func (cd *containerData) parsePsLine(line, cadvisorContainer string, inHostNames if err != nil { return nil, fmt.Errorf("invalid psr %q: %v", lastTwoFields[0], err) } - info.CgroupPath, err = cd.getCgroupPath(lastTwoFields[1]) - if err != nil { - return nil, fmt.Errorf("could not parse cgroup path from %q: %v", lastTwoFields[1], err) - } + info.CgroupPath = cd.getCgroupPath(lastTwoFields[1]) // Remove the ps command we just ran from cadvisor container. // Not necessary, but makes the cadvisor page look cleaner. diff --git a/manager/container_test.go b/manager/container_test.go index 6efae33563..6460c6ad47 100644 --- a/manager/container_test.go +++ b/manager/container_test.go @@ -505,3 +505,55 @@ func TestParsePsLine(t *testing.T) { }) } } + +var cgroupCases = []struct { + name string + cgroups string + path string +}{ + { + name: "no cgroup", + cgroups: "-", + path: "/", + }, + { + name: "random and meaningless string", + cgroups: "/this/is/a/path/to/some.file", + path: "/", + }, + { + name: "0::-type cgroup", + cgroups: "0::/docker/some-cgroup", + path: "/docker/some-cgroup", + }, + { + name: "memory cgroup", + cgroups: "4:memory:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6,2:net_cls:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + path: "/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + }, + { + name: "cpu,cpuacct cgroup", + cgroups: "4:cpu,cpuacct:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6,2:net_cls:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + path: "/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + }, + { + name: "cpu cgroup", + cgroups: "4:cpu:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6,2:net_cls:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + path: "/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + }, + { + name: "cpuacct cgroup", + cgroups: "4:cpuacct:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6,2:net_cls:/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + path: "/docker/09c89cd48b3597db904ab8e6920fef2cbf93588d037d9613ce362e25188f8ec6", + }, +} + +func TestGetCgroupPath(t *testing.T) { + for _, cgroup := range cgroupCases { + t.Run(cgroup.name, func(tt *testing.T) { + cd := &containerData{} + path := cd.getCgroupPath(cgroup.cgroups) + assert.Equal(t, cgroup.path, path) + }) + } +}