Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Carry 3962] Support process.scheduler #4025

Merged
merged 1 commit into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ v1.0.0 | `SCMP_ARCH_PARISC64` | Unplanned, due to lack
v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126)
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
v1.1.0 | `.process.scheduler` | TODO ([#3895](https://github.com/opencontainers/runc/issues/3895))
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)


Expand Down
63 changes: 63 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -219,6 +220,68 @@ type Config struct {

// TimeOffsets specifies the offset for supporting time namespaces.
TimeOffsets map[string]specs.LinuxTimeOffset `json:"time_offsets,omitempty"`

// Scheduler represents the scheduling attributes for a process.
Scheduler *Scheduler `json:"scheduler,omitempty"`
}

// Scheduler is based on the Linux sched_setattr(2) syscall.
type Scheduler = specs.Scheduler

// ToSchedAttr is to convert *configs.Scheduler to *unix.SchedAttr.
func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) {
var policy uint32
switch scheduler.Policy {
case specs.SchedOther:
policy = 0
case specs.SchedFIFO:
policy = 1
case specs.SchedRR:
policy = 2
case specs.SchedBatch:
policy = 3
case specs.SchedISO:
policy = 4
case specs.SchedIdle:
policy = 5
case specs.SchedDeadline:
policy = 6
default:
return nil, fmt.Errorf("invalid scheduler policy: %s", scheduler.Policy)
}

var flags uint64
for _, flag := range scheduler.Flags {
switch flag {
case specs.SchedFlagResetOnFork:
flags |= 0x01
case specs.SchedFlagReclaim:
flags |= 0x02
case specs.SchedFlagDLOverrun:
flags |= 0x04
case specs.SchedFlagKeepPolicy:
flags |= 0x08
case specs.SchedFlagKeepParams:
flags |= 0x10
case specs.SchedFlagUtilClampMin:
flags |= 0x20
case specs.SchedFlagUtilClampMax:
flags |= 0x40
default:
return nil, fmt.Errorf("invalid scheduler flag: %s", flag)
}
}

return &unix.SchedAttr{
Size: unix.SizeofSchedAttr,
Policy: policy,
Flags: flags,
Nice: scheduler.Nice,
Priority: uint32(scheduler.Priority),
Runtime: scheduler.Runtime,
Deadline: scheduler.Deadline,
Period: scheduler.Period,
}, nil
}

type (
Expand Down
23 changes: 23 additions & 0 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runtime-spec/specs-go"
selinux "github.com/opencontainers/selinux/go-selinux"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
Expand All @@ -30,6 +31,7 @@ func Validate(config *configs.Config) error {
intelrdtCheck,
rootlessEUIDCheck,
mountsStrict,
scheduler,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -353,3 +355,24 @@ func isHostNetNS(path string) (bool, error) {

return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
}

// scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html
func scheduler(config *configs.Config) error {
s := config.Scheduler
if s == nil {
return nil
}
if s.Policy == "" {
return errors.New("scheduler policy is required")
}
if s.Nice < -20 || s.Nice > 19 {
return fmt.Errorf("invalid scheduler.nice: %d", s.Nice)
}
if s.Priority != 0 && (s.Policy != specs.SchedFIFO && s.Policy != specs.SchedRR) {
return errors.New("scheduler.priority can only be specified for SchedFIFO or SchedRR policy")
}
if s.Policy != specs.SchedDeadline && (s.Runtime != 0 || s.Deadline != 0 || s.Period != 0) {
return errors.New("scheduler runtime/deadline/period can only be specified for SchedDeadline policy")
}
return nil
}
50 changes: 50 additions & 0 deletions libcontainer/configs/validate/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,3 +616,53 @@ func TestValidateIDMapMounts(t *testing.T) {
})
}
}

func TestValidateScheduler(t *testing.T) {
testCases := []struct {
isErr bool
policy string
niceValue int32
priority int32
runtime uint64
deadline uint64
period uint64
}{
{isErr: true, niceValue: 0},
{isErr: false, policy: "SCHED_OTHER", niceValue: 19},
{isErr: false, policy: "SCHED_OTHER", niceValue: -20},
{isErr: true, policy: "SCHED_OTHER", niceValue: 20},
{isErr: true, policy: "SCHED_OTHER", niceValue: -21},
{isErr: true, policy: "SCHED_OTHER", priority: 100},
{isErr: false, policy: "SCHED_FIFO", priority: 100},
{isErr: true, policy: "SCHED_FIFO", runtime: 20},
{isErr: true, policy: "SCHED_BATCH", deadline: 30},
{isErr: true, policy: "SCHED_IDLE", period: 40},
{isErr: true, policy: "SCHED_DEADLINE", priority: 100},
{isErr: false, policy: "SCHED_DEADLINE", runtime: 200},
{isErr: false, policy: "SCHED_DEADLINE", deadline: 300},
{isErr: false, policy: "SCHED_DEADLINE", period: 400},
}

for _, tc := range testCases {
scheduler := configs.Scheduler{
Policy: specs.LinuxSchedulerPolicy(tc.policy),
Nice: tc.niceValue,
Priority: tc.priority,
Runtime: tc.runtime,
Deadline: tc.deadline,
Period: tc.period,
}
config := &configs.Config{
Rootfs: "/var",
Scheduler: &scheduler,
}

err := Validate(config)
if tc.isErr && err == nil {
t.Errorf("scheduler: %d, expected error, got nil", tc.niceValue)
}
if !tc.isErr && err != nil {
t.Errorf("scheduler: %d, expected nil, got error %v", tc.niceValue, err)
}
}
}
14 changes: 14 additions & 0 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,20 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
return nil
}

func setupScheduler(config *configs.Config) error {
attr, err := configs.ToSchedAttr(config.Scheduler)
if err != nil {
return err
}
if err := unix.SchedSetAttr(0, attr, 0); err != nil {
if errors.Is(err, unix.EPERM) && config.Cgroups.CpusetCpus != "" {
return errors.New("process scheduler can't be used together with AllowedCPUs")
}
return fmt.Errorf("error setting scheduler: %w", err)
}
return nil
}

// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
Expand Down
2 changes: 2 additions & 0 deletions libcontainer/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ type Process struct {
//
// For cgroup v2, the only key allowed is "".
SubCgroupPaths map[string]string

Scheduler *configs.Scheduler
}

// Wait waits for the process to exit.
Expand Down
6 changes: 6 additions & 0 deletions libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ func (l *linuxSetnsInit) Init() error {
unix.Umask(int(*l.config.Config.Umask))
}

if l.config.Config.Scheduler != nil {
if err := setupScheduler(l.config.Config); err != nil {
return err
}
}

if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
return err
}
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,10 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
Ambient: spec.Process.Capabilities.Ambient,
}
}
if spec.Process.Scheduler != nil {
s := *spec.Process.Scheduler
config.Scheduler = &s
}
}
createHooks(spec, config)
config.Version = specs.Version
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ func (l *linuxStandardInit) Init() error {
return &os.SyscallError{Syscall: "prctl(SET_NO_NEW_PRIVS)", Err: err}
}
}

if l.config.Config.Scheduler != nil {
if err := setupScheduler(l.config.Config); err != nil {
return err
}
}

// Tell our parent that we're ready to Execv. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
34 changes: 34 additions & 0 deletions tests/integration/scheduler.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bats

load helpers

function setup() {
requires root
setup_debian
}

function teardown() {
teardown_bundle
}

@test "scheduler is applied" {
update_config ' .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "priority": 0, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
[ "$status" -eq 0 ]

runc exec test_scheduler chrt -p 1
[ "$status" -eq 0 ]

[[ "${lines[0]}" == *"scheduling policy: SCHED_DEADLINE" ]]
[[ "${lines[1]}" == *"priority: 0" ]]
[[ "${lines[2]}" == *"runtime/deadline/period parameters: 42000/1000000/1000000" ]]
}

@test "scheduler vs cpus" {
update_config ' .linux.resources.cpu.cpus = "0"
| .process.scheduler = {"policy": "SCHED_DEADLINE", "nice": 19, "runtime": 42000, "deadline": 1000000, "period": 1000000, }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_scheduler
[ "$status" -eq 1 ]
}
5 changes: 5 additions & 0 deletions utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
}

if p.Scheduler != nil {
s := *p.Scheduler
lp.Scheduler = &s
}

if p.Capabilities != nil {
lp.Capabilities = &configs.Capabilities{}
lp.Capabilities.Bounding = p.Capabilities.Bounding
Expand Down