Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backporting #2750, #2755 and #2770 to 0.38 #2815

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 132 additions & 93 deletions manager/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import (
"github.com/google/cadvisor/summary"
"github.com/google/cadvisor/utils/cpuload"

units "github.com/docker/go-units"
"github.com/docker/go-units"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
Expand All @@ -47,9 +47,14 @@ import (
var enableLoadReader = flag.Bool("enable_load_reader", false, "Whether to enable cpu load reader")
var HousekeepingInterval = flag.Duration("housekeeping_interval", 1*time.Second, "Interval between container housekeepings")

// TODO: replace regular expressions with something simpler, such as strings.Split().
// cgroup type chosen to fetch the cgroup path of a process.
// Memory has been chosen, as it is one of the default cgroups that is enabled for most containers.
var cgroupPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`)
// Memory has been chosen, as it is one of the default cgroups that is enabled for most containers...
var cgroupMemoryPathRegExp = regexp.MustCompile(`memory[^:]*:(.*?)[,;$]`)

// ... but there are systems (e.g. Raspberry Pi 4) where memory cgroup controller is disabled by default.
// We should check cpu cgroup then.
var cgroupCPUPathRegExp = regexp.MustCompile(`cpu[^:]*:(.*?)[,;$]`)

type containerInfo struct {
info.ContainerReference
Expand Down Expand Up @@ -138,7 +143,10 @@ func (cd *containerData) allowErrorLogging() bool {
// periodic housekeeping to reset. This should be used sparingly, as calling OnDemandHousekeeping frequently
// can have serious performance costs.
func (cd *containerData) OnDemandHousekeeping(maxAge time.Duration) {
if cd.clock.Since(cd.statsLastUpdatedTime) > maxAge {
cd.lock.Lock()
timeSinceStatsLastUpdate := cd.clock.Since(cd.statsLastUpdatedTime)
cd.lock.Unlock()
if timeSinceStatsLastUpdate > maxAge {
housekeepingFinishedChan := make(chan struct{})
cd.onDemandChan <- housekeepingFinishedChan
select {
Expand Down Expand Up @@ -195,20 +203,28 @@ func (cd *containerData) DerivedStats() (v2.DerivedStats, error) {
return cd.summaryReader.DerivedStats()
}

func (cd *containerData) getCgroupPath(cgroups string) (string, error) {
func (cd *containerData) getCgroupPath(cgroups string) string {
if cgroups == "-" {
return "/", nil
return "/"
}
if strings.HasPrefix(cgroups, "0::") {
return cgroups[3:], nil
return cgroups[3:]
}
matches := cgroupPathRegExp.FindSubmatch([]byte(cgroups))
matches := cgroupMemoryPathRegExp.FindSubmatch([]byte(cgroups))
if len(matches) != 2 {
klog.V(3).Infof("failed to get memory cgroup path from %q", cgroups)
// return root in case of failures - memory hierarchy might not be enabled.
return "/", nil
}
return string(matches[1]), nil
klog.V(3).Infof(
"failed to get memory cgroup path from %q, will try to get cpu cgroup path",
cgroups,
)
// On some systems (e.g. Raspberry PI 4) cgroup memory controlled is disabled by default.
matches = cgroupCPUPathRegExp.FindSubmatch([]byte(cgroups))
if len(matches) != 2 {
klog.V(3).Infof("failed to get cpu cgroup path from %q; assuming root cgroup", cgroups)
// return root in case of failures - memory hierarchy might not be enabled.
return "/"
}
}
return string(matches[1])
}

// Returns contents of a file inside the container root.
Expand Down Expand Up @@ -271,10 +287,7 @@ func (cd *containerData) getContainerPids(inHostNamespace bool) ([]string, error
return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
}
pid := fields[0]
cgroup, err := cd.getCgroupPath(fields[1])
if err != nil {
return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[1], err)
}
cgroup := cd.getCgroupPath(fields[1])
if cd.info.Name == cgroup {
pids = append(pids, pid)
}
Expand All @@ -283,106 +296,130 @@ func (cd *containerData) getContainerPids(inHostNamespace bool) ([]string, error
}

func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) {
// report all processes for root.
isRoot := cd.info.Name == "/"
rootfs := "/"
if !inHostNamespace {
rootfs = "/rootfs"
}
format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,psr,cgroup"
out, err := cd.getPsOutput(inHostNamespace, format)
if err != nil {
return nil, err
}
expectedFields := 13
return cd.parseProcessList(cadvisorContainer, inHostNamespace, out)
}

func (cd *containerData) parseProcessList(cadvisorContainer string, inHostNamespace bool, out []byte) ([]v2.ProcessInfo, error) {
rootfs := "/"
if !inHostNamespace {
rootfs = "/rootfs"
}
processes := []v2.ProcessInfo{}
lines := strings.Split(string(out), "\n")
for _, line := range lines[1:] {
if len(line) == 0 {
continue
}
fields := strings.Fields(line)
if len(fields) < expectedFields {
return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
}
pid, err := strconv.Atoi(fields[1])
if err != nil {
return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err)
}
ppid, err := strconv.Atoi(fields[2])
if err != nil {
return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err)
}
percentCPU, err := strconv.ParseFloat(fields[4], 32)
if err != nil {
return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err)
}
percentMem, err := strconv.ParseFloat(fields[5], 32)
if err != nil {
return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err)
}
rss, err := strconv.ParseUint(fields[6], 0, 64)
if err != nil {
return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err)
}
// convert to bytes
rss *= 1024
vs, err := strconv.ParseUint(fields[7], 0, 64)
if err != nil {
return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err)
}
// convert to bytes
vs *= 1024
psr, err := strconv.Atoi(fields[11])
if err != nil {
return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err)
}

cgroup, err := cd.getCgroupPath(fields[12])
processInfo, err := cd.parsePsLine(line, cadvisorContainer, inHostNamespace)
if err != nil {
return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[11], err)
return nil, fmt.Errorf("could not parse line %s: %v", line, err)
}
// Remove the ps command we just ran from cadvisor container.
// Not necessary, but makes the cadvisor page look cleaner.
if !inHostNamespace && cadvisorContainer == cgroup && fields[10] == "ps" {
if processInfo == nil {
continue
}
var cgroupPath string
if isRoot {
cgroupPath = cgroup
}

var fdCount int
dirPath := path.Join(rootfs, "/proc", strconv.Itoa(pid), "fd")
dirPath := path.Join(rootfs, "/proc", strconv.Itoa(processInfo.Pid), "fd")
fds, err := ioutil.ReadDir(dirPath)
if err != nil {
klog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err)
continue
}
fdCount = len(fds)
processInfo.FdCount = fdCount

if isRoot || cd.info.Name == cgroup {
processes = append(processes, v2.ProcessInfo{
User: fields[0],
Pid: pid,
Ppid: ppid,
StartTime: fields[3],
PercentCpu: float32(percentCPU),
PercentMemory: float32(percentMem),
RSS: rss,
VirtualSize: vs,
Status: fields[8],
RunningTime: fields[9],
Cmd: fields[10],
CgroupPath: cgroupPath,
FdCount: fdCount,
Psr: psr,
})
}
processes = append(processes, *processInfo)
}
return processes, nil
}

func (cd *containerData) isRoot() bool {
return cd.info.Name == "/"
}

func (cd *containerData) parsePsLine(line, cadvisorContainer string, inHostNamespace bool) (*v2.ProcessInfo, error) {
const expectedFields = 13
if len(line) == 0 {
return nil, nil
}

info := v2.ProcessInfo{}
var err error

fields := strings.Fields(line)
if len(fields) < expectedFields {
return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
}
info.User = fields[0]
info.StartTime = fields[3]
info.Status = fields[8]
info.RunningTime = fields[9]

info.Pid, err = strconv.Atoi(fields[1])
if err != nil {
return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err)
}
info.Ppid, err = strconv.Atoi(fields[2])
if err != nil {
return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err)
}

percentCPU, err := strconv.ParseFloat(fields[4], 32)
if err != nil {
return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err)
}
info.PercentCpu = float32(percentCPU)
percentMem, err := strconv.ParseFloat(fields[5], 32)
if err != nil {
return nil, fmt.Errorf("invalid memory percent %q: %v", fields[5], err)
}
info.PercentMemory = float32(percentMem)

info.RSS, err = strconv.ParseUint(fields[6], 0, 64)
if err != nil {
return nil, fmt.Errorf("invalid rss %q: %v", fields[6], err)
}
info.VirtualSize, err = strconv.ParseUint(fields[7], 0, 64)
if err != nil {
return nil, fmt.Errorf("invalid virtual size %q: %v", fields[7], err)
}
// convert to bytes
info.RSS *= 1024
info.VirtualSize *= 1024

// According to `man ps`: The following user-defined format specifiers may contain spaces: args, cmd, comm, command,
// fname, ucmd, ucomm, lstart, bsdstart, start.
// Therefore we need to be able to parse comm that consists of multiple space-separated parts.
info.Cmd = strings.Join(fields[10:len(fields)-2], " ")

// These are last two parts of the line. We create a subslice of `fields` to handle comm that includes spaces.
lastTwoFields := fields[len(fields)-2:]
info.Psr, err = strconv.Atoi(lastTwoFields[0])
if err != nil {
return nil, fmt.Errorf("invalid psr %q: %v", lastTwoFields[0], err)
}
info.CgroupPath = cd.getCgroupPath(lastTwoFields[1])

// Remove the ps command we just ran from cadvisor container.
// Not necessary, but makes the cadvisor page look cleaner.
if !inHostNamespace && cadvisorContainer == info.CgroupPath && info.Cmd == "ps" {
return nil, nil
}

// Do not report processes from other containers when non-root container requested.
if !cd.isRoot() && info.CgroupPath != cd.info.Name {
return nil, nil
}

// Remove cgroup information when non-root container requested.
if !cd.isRoot() {
info.CgroupPath = ""
}
return &info, nil
}

func newContainerData(containerName string, memoryCache *memory.InMemoryCache, handler container.ContainerHandler, logUsage bool, collectorManager collector.CollectorManager, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, clock clock.Clock) (*containerData, error) {
if memoryCache == nil {
return nil, fmt.Errorf("nil memory storage")
Expand Down Expand Up @@ -516,7 +553,7 @@ func (cd *containerData) housekeeping() {
usageCPUNs := uint64(0)
for i := range stats {
if i > 0 {
usageCPUNs += (stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total)
usageCPUNs += stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total
}
}
usageMemory := stats[numSamples-1].Memory.Usage
Expand Down Expand Up @@ -555,6 +592,8 @@ func (cd *containerData) housekeepingTick(timer <-chan time.Time, longHousekeepi
klog.V(3).Infof("[%s] Housekeeping took %s", cd.info.Name, duration)
}
cd.notifyOnDemand()
cd.lock.Lock()
defer cd.lock.Unlock()
cd.statsLastUpdatedTime = cd.clock.Now()
return true
}
Expand Down
Loading