From 412c6f0630fe77ebeff53d775eb31591b194d3f1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 30 Nov 2020 12:20:41 -0800 Subject: [PATCH 1/3] libct/system/proc_test: fix, improve, add benchmark 1. Add a test case that tests parentheses in command. 2. Replace individual comparisons with reflect.DeepEqual. This also fixes wrong %-style types in Fatalf statements. 3. Replace Fatalf with Errorf so we don't bail out on the first failure, and do not check result on error. 4. Add two benchmarks. On my laptop, they show: BenchmarkParseStat-4 116415 10804 ns/op BenchmarkParseRealStat-4 240 4781769 ns/op Signed-off-by: Kir Kolyshkin --- libcontainer/system/proc_test.go | 139 ++++++++++++++++++++++++------- 1 file changed, 107 insertions(+), 32 deletions(-) diff --git a/libcontainer/system/proc_test.go b/libcontainer/system/proc_test.go index 7e1acc5bcb4..5e0f6b860fa 100644 --- a/libcontainer/system/proc_test.go +++ b/libcontainer/system/proc_test.go @@ -1,45 +1,120 @@ package system -import "testing" +import ( + "errors" + "math/bits" + "os" + "reflect" + "strconv" + "testing" +) -func TestParseStartTime(t *testing.T) { - data := map[string]Stat_t{ - "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": { - PID: 4902, - Name: "gunicorn: maste", - State: 'S', - StartTime: 9126532, - }, - "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { - PID: 9534, - Name: "cat", - State: 'R', - StartTime: 9214966, - }, +var procdata = map[string]Stat_t{ + "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": { + PID: 4902, + Name: "gunicorn: maste", + State: 'S', + StartTime: 9126532, + }, + "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { + PID: 9534, + Name: "cat", + State: 'R', + StartTime: 9214966, + }, + "12345 ((ugly )pr()cess() R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { + PID: 12345, + Name: "(ugly )pr()cess(", + State: 'R', + StartTime: 9214966, + }, + "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": { + PID: 24767, + Name: "irq/44-mei_me", + State: 'S', + StartTime: 8722075, + }, + "0 () I 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0": { + Name: "", + State: 'I', + StartTime: 0, + }, +} - "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": { - PID: 24767, - Name: "irq/44-mei_me", - State: 'S', - StartTime: 8722075, - }, - } - for line, expected := range data { +func TestParseStat(t *testing.T) { + for line, exp := range procdata { st, err := parseStat(line) if err != nil { - t.Fatal(err) + t.Errorf("input %q, unexpected error %v", line, err) + } else if !reflect.DeepEqual(st, exp) { + t.Errorf("input %q, expected %+v, got %+v", line, exp, st) } - if st.PID != expected.PID { - t.Fatalf("expected PID %q but received %q", expected.PID, st.PID) + } +} + +func BenchmarkParseStat(b *testing.B) { + var ( + st, exp Stat_t + line string + err error + ) + + for i := 0; i != b.N; i++ { + for line, exp = range procdata { + st, err = parseStat(line) } - if st.State != expected.State { - t.Fatalf("expected state %q but received %q", expected.State, st.State) + } + if err != nil { + b.Fatal(err) + } + if !reflect.DeepEqual(st, exp) { + b.Fatal("wrong result") + } +} + +func BenchmarkParseRealStat(b *testing.B) { + var ( + st Stat_t + err error + total int + ) + b.StopTimer() + fd, err := os.Open("/proc") + if err != nil { + b.Fatal(err) + } + defer fd.Close() + + for i := 0; i != b.N; i++ { + count := 0 + if _, err := fd.Seek(0, 0); err != nil { + b.Fatal(err) } - if st.Name != expected.Name { - t.Fatalf("expected name %q but received %q", expected.Name, st.Name) + names, err := fd.Readdirnames(-1) + if err != nil { + b.Fatal(err) } - if st.StartTime != expected.StartTime { - t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime) + for _, n := range names { + pid, err := strconv.ParseUint(n, 10, bits.UintSize) + if err != nil { + continue + } + b.StartTimer() + st, err = Stat(int(pid)) + b.StopTimer() + if err != nil { + // Ignore a process that just finished. + if errors.Is(err, os.ErrNotExist) { + continue + } + b.Fatal(err) + } + if st.PID != uint(pid) { + b.Fatalf("pid mismatch, expected %d, got %d", pid, st.PID) + } + count++ } + total += count } + b.Logf("N: %d, parsed %d pids, last stat: %+v, err: %v", b.N, total, st, err) } From f90008aec8607345aa243ac41977f452ee690771 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 30 Nov 2020 12:26:20 -0800 Subject: [PATCH 2/3] libct/system.Stat: fix/improve/speedup 1. Remove PID field as it is useless. 2. Rewrite parseStat() to make it faster and more correct: - do not use fmt.Scanf as it is very slow; - avoid splitting data into 20+ fields, of which we only need 2; - make sure to not panic on short lines and other bad input; - add some bad input tests (some fail with old code); - use LastIndexByte instead of LastIndex. Benchmarks: before (from the previous commit message): > BenchmarkParseStat-4 116415 10804 ns/op > BenchmarkParseRealStat-4 240 4781769 ns/op after: > BenchmarkParseStat-4 1164948 1068 ns/op > BenchmarkParseRealStat-4 331 3458315 ns/op We are seeing 10x speedup in a synthetic benchmark, and about 1.4x speedup in a real world benchmark. While at it, do not ignore any possible errors, and properly wrap those. [v2: use pkg/errors more, remove t.Logf from test] [v3: rebased; drop pkg/errors; gofumpt'ed] [v4: rebased; improved description] [v5: rebased; mention bad input tests, added second benchmark results] [v6: remove PID field, do not use strings.Split, further speedup] Signed-off-by: Kir Kolyshkin --- libcontainer/system/proc.go | 62 ++++++++++++++++---------- libcontainer/system/proc_test.go | 74 +++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 29 deletions(-) diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go index d0407cfe42a..ef9ebcb063e 100644 --- a/libcontainer/system/proc.go +++ b/libcontainer/system/proc.go @@ -48,9 +48,6 @@ func (s State) String() string { // described in proc(5) with names based on the /proc/[pid]/status // fields. type Stat_t struct { - // PID is the process ID. - PID uint - // Name is the command run by the process. Name string @@ -72,32 +69,53 @@ func Stat(pid int) (stat Stat_t, err error) { } func parseStat(data string) (stat Stat_t, err error) { - // From proc(5), field 2 could contain space and is inside `(` and `)`. - // The following is an example: + // Example: // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 - i := strings.LastIndex(data, ")") - if i <= 2 || i >= len(data)-1 { - return stat, fmt.Errorf("invalid stat data: %q", data) + // The fields are space-separated, see full description in proc(5). + // + // We are only interested in: + // * field 2: process name. It is the only field enclosed into + // parenthesis, as it can contain spaces (and parenthesis) inside. + // * field 3: process state, a single character (%c) + // * field 22: process start time, a long unsigned integer (%llu). + + // 1. Look for the first '(' and the last ')' first, what's in between is Name. + // We expect at least 20 fields and a space after the last one. + + const minAfterName = 20*2 + 1 // the min field is '0 '. + + first := strings.IndexByte(data, '(') + if first < 0 || first+minAfterName >= len(data) { + return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data) } - parts := strings.SplitN(data[:i], "(", 2) - if len(parts) != 2 { - return stat, fmt.Errorf("invalid stat data: %q", data) + last := strings.LastIndexByte(data, ')') + if last <= first || last+minAfterName >= len(data) { + return stat, fmt.Errorf("invalid stat data (no comm or too short): %q", data) } - stat.Name = parts[1] - _, err = fmt.Sscanf(parts[0], "%d", &stat.PID) + stat.Name = data[first+1 : last] + + // 2. Remove fields 1 and 2 and a space after. State is right after. + data = data[last+2:] + stat.State = State(data[0]) + + // 3. StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces. + skipSpaces := 22 - 3 + for first = 0; skipSpaces > 0 && first < len(data); first++ { + if data[first] == ' ' { + skipSpaces-- + } + } + // Now first points to StartTime; look for space right after. + i := strings.IndexByte(data[first:], ' ') + if i < 0 { + return stat, fmt.Errorf("invalid stat data (too short): %q", data) + } + stat.StartTime, err = strconv.ParseUint(data[first:first+i], 10, 64) if err != nil { - return stat, err + return stat, fmt.Errorf("invalid stat data (bad start time): %w", err) } - // parts indexes should be offset by 3 from the field number given - // proc(5), because parts is zero-indexed and we've removed fields - // one (PID) and two (Name) in the paren-split. - parts = strings.Split(data[i+2:], " ") - var state int - fmt.Sscanf(parts[3-3], "%c", &state) //nolint:staticcheck // "3-3" is more readable in this context. - stat.State = State(state) - fmt.Sscanf(parts[22-3], "%d", &stat.StartTime) return stat, nil } diff --git a/libcontainer/system/proc_test.go b/libcontainer/system/proc_test.go index 5e0f6b860fa..949d8403ea9 100644 --- a/libcontainer/system/proc_test.go +++ b/libcontainer/system/proc_test.go @@ -11,25 +11,21 @@ import ( var procdata = map[string]Stat_t{ "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": { - PID: 4902, Name: "gunicorn: maste", State: 'S', StartTime: 9126532, }, "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { - PID: 9534, Name: "cat", State: 'R', StartTime: 9214966, }, "12345 ((ugly )pr()cess() R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { - PID: 12345, Name: "(ugly )pr()cess(", State: 'R', StartTime: 9214966, }, "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": { - PID: 24767, Name: "irq/44-mei_me", State: 'S', StartTime: 8722075, @@ -39,6 +35,12 @@ var procdata = map[string]Stat_t{ State: 'I', StartTime: 0, }, + // Not entirely correct, but minimally viable input (StartTime and a space after). + "1 (woo hoo) S 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 ": { + Name: "woo hoo", + State: 'S', + StartTime: 4, + }, } func TestParseStat(t *testing.T) { @@ -52,6 +54,67 @@ func TestParseStat(t *testing.T) { } } +func TestParseStatBadInput(t *testing.T) { + cases := []struct { + desc, input string + }{ + { + "no (", + "123 ) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + }, + { + "no )", + "123 ( S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + }, + { + ") at end", + "123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)", + }, + { + "misplaced ()", + "123 )one( S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + }, + { + "misplaced empty ()", + "123 )( S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + }, + { + "empty line", + "", + }, + { + "short line", + "123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + }, + { + "short line (no space after stime)", + "123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42", + }, + { + "bad stime", + "123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 ", + }, + { + "bad stime 2", // would be valid if not -1 + "123 (cmd) S -1 ", + }, + { + "a tad short", + "1234 (cmd) ", + }, + { + "bad stime", + "123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1", + }, + } + for _, c := range cases { + st, err := parseStat(c.input) + if err == nil { + t.Errorf("case %q, expected error, got nil, %+v", c.desc, st) + } + } +} + func BenchmarkParseStat(b *testing.B) { var ( st, exp Stat_t @@ -109,9 +172,6 @@ func BenchmarkParseRealStat(b *testing.B) { } b.Fatal(err) } - if st.PID != uint(pid) { - b.Fatalf("pid mismatch, expected %d, got %d", pid, st.PID) - } count++ } total += count From a37a89f4dbbb9bf2e30a39cf37c654852f755219 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 3 Aug 2021 19:28:48 -0700 Subject: [PATCH 3/3] libct/system: add I and P process states Those states are available since Linux 4.14 (kernel commits 8ef9925b02c23e3838d5 and 06eb61844d841d003). Before this patch, they were shown as unknown. This is mostly cosmetical. Note that I is described in /proc/pid/status as just "idle", although elsewhere it says it's an idle kernel thread. Let's have it as "idle" for brevity. Signed-off-by: Kir Kolyshkin --- libcontainer/system/proc.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go index ef9ebcb063e..8f0a68afd12 100644 --- a/libcontainer/system/proc.go +++ b/libcontainer/system/proc.go @@ -19,6 +19,8 @@ const ( // Only values for Linux 3.14 and later are listed here Stopped State = 'T' TracingStop State = 't' Zombie State = 'Z' + Parked State = 'P' + Idle State = 'I' ) // String forms of the state from proc(5)'s documentation for @@ -39,6 +41,10 @@ func (s State) String() string { return "tracing stop" case Zombie: return "zombie" + case Parked: + return "parked" + case Idle: + return "idle" // kernel thread default: return fmt.Sprintf("unknown (%c)", s) }