Skip to content

Commit

Permalink
libct/system.Stat: fix/improve/speedup
Browse files Browse the repository at this point in the history
Rewrite parseStat() to make it faster and more correct:

 - do not use fmt.Scanf as it is very slow;
 - avoid splitting data into 20+ fields, of which we only need 2;
 - make sure to not panic on short lines and other bad input;
 - add some bad input tests (some fail with old code);
 - use LastIndexByte instead of LastIndex.

Benchmarks:

before (from the previous commit message):

> BenchmarkParseStat-4            146722              8223 ns/op
> BenchmarkParseRealStat-4           231           4726374 ns/op

after:

> BenchmarkParseStat-4       	  942146	      1137 ns/op
> BenchmarkParseRealStat-4   	     331	   3463526 ns/op

We are seeing about 7x speedup in a synthetic benchmark, and about 1.5x
speedup in a real world benchmark.

While at it, do not ignore any possible errors, and properly wrap those.

[v2: use pkg/errors more, remove t.Logf from test]
[v3: rebased; drop pkg/errors; gofumpt'ed]
[v4: rebased; improved description]
[v5: rebased; mention bad input tests, added second benchmark results]

Signed-off-by: Kir Kolyshkin <[email protected]>
  • Loading branch information
kolyshkin committed Jul 12, 2021
1 parent 02809a3 commit 0eda452
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 14 deletions.
44 changes: 30 additions & 14 deletions libcontainer/system/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package system
import (
"fmt"
"io/ioutil"
"math/bits"
"path/filepath"
"strconv"
"strings"
Expand Down Expand Up @@ -75,29 +76,44 @@ func parseStat(data string) (stat Stat_t, err error) {
// From proc(5), field 2 could contain space and is inside `(` and `)`.
// The following is an example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
i := strings.LastIndex(data, ")")
// Field 2 can also contain parenthesis, so be sure to look for
// the first '(' and the last ')'.
i := strings.LastIndexByte(data, ')')
if i <= 2 || i >= len(data)-1 {
return stat, fmt.Errorf("invalid stat data: %q", data)
return stat, fmt.Errorf("invalid stat data (no comm): %q", data)
}

parts := strings.SplitN(data[:i], "(", 2)
parts := strings.SplitN(data[:i], " (", 2)
if len(parts) != 2 {
return stat, fmt.Errorf("invalid stat data: %q", data)
return stat, fmt.Errorf("invalid stat data (no comm): %q", data)
}

stat.Name = parts[1]
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
pid, err := strconv.ParseUint(parts[0], 10, bits.UintSize)
if err != nil {
return stat, err
return stat, fmt.Errorf("invalid stat data (bad pid): %w", err)
}
stat.PID = uint(pid)

// Remove the first two fields and a space after.
data = data[i+2:]
stat.State = State(data[0])

// StartTime is field 22, data is at field 3 now, so we need to skip 19 spaces.
skipSpaces := 22 - 3
for i = 0; skipSpaces > 0 && i < len(data); i++ {
if data[i] == ' ' {
skipSpaces--
}
}
j := strings.IndexByte(data[i:], ' ')
if j <= 0 {
return stat, fmt.Errorf("invalid stat data (too short): %q", data)
}
stat.StartTime, err = strconv.ParseUint(data[i:i+j], 10, 64)
if err != nil {
return stat, fmt.Errorf("invalid stat data (bad start time): %w", err)
}

// parts indexes should be offset by 3 from the field number given
// proc(5), because parts is zero-indexed and we've removed fields
// one (PID) and two (Name) in the paren-split.
parts = strings.Split(data[i+2:], " ")
var state int
fmt.Sscanf(parts[3-3], "%c", &state) //nolint:staticcheck // "3-3" is more readable in this context.
stat.State = State(state)
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
return stat, nil
}
45 changes: 45 additions & 0 deletions libcontainer/system/proc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,51 @@ func TestParseStat(t *testing.T) {
}
}

func TestParseStatBadInput(t *testing.T) {
cases := []struct {
desc, input string
}{
{
"no (",
"123 ) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
},
{
"no )",
"123 ( S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
},
{
"negative pid",
"-1 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
},
{
"empty line",
"",
},
{
"short line",
"123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
},
{
"short line (no space after stime)",
"123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42",
},
{
"bad stime",
"123 (cmd) S 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 ",
},
{
"bad stime 2", // would be valid if not -1
"123 (cmd) S -1 ",
},
}
for _, c := range cases {
_, err := parseStat(c.input)
if err == nil {
t.Errorf("case %q, expected error, got nil", c.desc)
}
}
}

func BenchmarkParseStat(b *testing.B) {
var (
st, exp Stat_t
Expand Down

0 comments on commit 0eda452

Please sign in to comment.