Skip to content

Commit

Permalink
libct/system.Stat: fix/improve/speedup
Browse files Browse the repository at this point in the history
1. Remove PID field as it is useless.

2. Rewrite parseStat() to make it faster and more correct:

 - do not use fmt.Scanf as it is very slow;
 - avoid splitting data into 20+ fields, of which we only need 2;
 - make sure to not panic on short lines and other bad input;
 - add some bad input tests (some fail with old code);
 - use LastIndexByte instead of LastIndex.

Benchmarks:

before (from the previous commit message):

> BenchmarkParseStat-4            123442              8256 ns/op
> BenchmarkParseRealStat-4           334           3461994 ns/op

after:

> BenchmarkParseStat-4       	 1358124	       874.0 ns/op
> BenchmarkParseRealStat-4   	     451	   2510334 ns/op

We are seeing about 9x speedup in a synthetic benchmark, and about 1.5x
speedup in a real world benchmark.

While at it, do not ignore any possible errors, and properly wrap those.

[v2: use pkg/errors more, remove t.Logf from test]
[v3: rebased; drop pkg/errors; gofumpt'ed]
[v4: rebased; improved description]
[v5: rebased; mention bad input tests, added second benchmark results]
[v6: remove PID field]

Signed-off-by: Kir Kolyshkin <[email protected]>
  • Loading branch information
kolyshkin committed Aug 4, 2021
1 parent 32d1f35 commit f4716d3
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 24 deletions.
41 changes: 24 additions & 17 deletions libcontainer/system/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@ func (s State) String() string {
// described in proc(5) with names based on the /proc/[pid]/status
// fields.
type Stat_t struct {
// PID is the process ID.
PID uint

// Name is the command run by the process.
Name string

Expand All @@ -75,29 +72,39 @@ func parseStat(data string) (stat Stat_t, err error) {
// From proc(5), field 2 could contain space and is inside `(` and `)`.
// The following is an example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
i := strings.LastIndex(data, ")")
// Field 2 can also contain parenthesis, so be sure to look for
// the first '(' and the last ')'.
i := strings.LastIndexByte(data, ')')
if i <= 2 || i >= len(data)-1 {
return stat, fmt.Errorf("invalid stat data: %q", data)
return stat, fmt.Errorf("invalid stat data (no comm): %q", data)
}

parts := strings.SplitN(data[:i], "(", 2)
parts := strings.SplitN(data[:i], " (", 2)
if len(parts) != 2 {
return stat, fmt.Errorf("invalid stat data: %q", data)
return stat, fmt.Errorf("invalid stat data (no comm): %q", data)
}

stat.Name = parts[1]
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)

// Remove the first two fields and a space after.
data = data[i+2:]
stat.State = State(data[0])

// StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces.
skipSpaces := 22 - 3
for i = 0; skipSpaces > 0 && i < len(data); i++ {
if data[i] == ' ' {
skipSpaces--
}
}
j := strings.IndexByte(data[i:], ' ')
if j <= 0 {
return stat, fmt.Errorf("invalid stat data (too short): %q", data)
}
stat.StartTime, err = strconv.ParseUint(data[i:i+j], 10, 64)
if err != nil {
return stat, err
return stat, fmt.Errorf("invalid stat data (bad start time): %w", err)
}

// parts indexes should be offset by 3 from the field number given
// proc(5), because parts is zero-indexed and we've removed fields
// one (PID) and two (Name) in the paren-split.
parts = strings.Split(data[i+2:], " ")
var state int
fmt.Sscanf(parts[3-3], "%c", &state) //nolint:staticcheck // "3-3" is more readable in this context.
stat.State = State(state)
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
return stat, nil
}
48 changes: 41 additions & 7 deletions libcontainer/system/proc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,21 @@ import (

var procdata = map[string]Stat_t{
"4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": {
PID: 4902,
Name: "gunicorn: maste",
State: 'S',
StartTime: 9126532,
},
"9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": {
PID: 9534,
Name: "cat",
State: 'R',
StartTime: 9214966,
},
"12345 ((ugly )pr()cess() R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": {
PID: 12345,
Name: "(ugly )pr()cess(",
State: 'R',
StartTime: 9214966,
},
"24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": {
PID: 24767,
Name: "irq/44-mei_me",
State: 'S',
StartTime: 8722075,
Expand All @@ -47,6 +43,47 @@ func TestParseStat(t *testing.T) {
}
}

func TestParseStatBadInput(t *testing.T) {
cases := []struct {
desc, input string
}{
{
"no (",
"123 ) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
},
{
"no )",
"123 ( S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
},
{
"empty line",
"",
},
{
"short line",
"123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
},
{
"short line (no space after stime)",
"123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42",
},
{
"bad stime",
"123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 ",
},
{
"bad stime 2", // would be valid if not -1
"123 (cmd) S -1 ",
},
}
for _, c := range cases {
_, err := parseStat(c.input)
if err == nil {
t.Errorf("case %q, expected error, got nil", c.desc)
}
}
}

func BenchmarkParseStat(b *testing.B) {
var (
st, exp Stat_t
Expand Down Expand Up @@ -104,9 +141,6 @@ func BenchmarkParseRealStat(b *testing.B) {
}
b.Fatal(err)
}
if st.PID != uint(pid) {
b.Fatalf("pid mismatch, expected %d, got %d", pid, st.PID)
}
count++
}
total += count
Expand Down

0 comments on commit f4716d3

Please sign in to comment.