From 0e8ada110fa0d936c512af9e0821b624489fe8f8 Mon Sep 17 00:00:00 2001 From: Jernej Kos Date: Fri, 22 Apr 2022 12:55:39 +0200 Subject: [PATCH 1/3] go/worker/compute: Only advertise active version for TEE runtimes Previously this caused additional downtime on upgrades due to capability updates not being allowed. --- .changelog/4683.bugfix.md | 4 ++++ go/worker/compute/executor/committee/node.go | 11 ++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 .changelog/4683.bugfix.md diff --git a/.changelog/4683.bugfix.md b/.changelog/4683.bugfix.md new file mode 100644 index 00000000000..473d176f7ea --- /dev/null +++ b/.changelog/4683.bugfix.md @@ -0,0 +1,4 @@ +go/worker/compute: Only advertise active version for TEE runtimes + +Previously this caused additional downtime on upgrades due to capability +updates not being allowed. diff --git a/go/worker/compute/executor/committee/node.go b/go/worker/compute/executor/committee/node.go index 4ebccac166d..3944538a68c 100644 --- a/go/worker/compute/executor/committee/node.go +++ b/go/worker/compute/executor/committee/node.go @@ -1532,10 +1532,15 @@ func (n *Node) nudgeAvailability(force bool) { n.roleProvider.SetAvailable(func(nd *node.Node) error { for _, version := range n.commonNode.Runtime.HostVersions() { - rt := nd.AddOrUpdateRuntime(n.commonNode.Runtime.ID(), version) - if rt.Version == n.runtimeVersion { - rt.Capabilities.TEE = n.runtimeCapabilityTEE + // For TEE-enabled runtimes we can only advertise the active version as this will + // otherwise cause additional downtime on upgrades due to capability updates not + // being allowed. + if n.runtimeCapabilityTEE != nil && version != n.runtimeVersion { + continue } + + rt := nd.AddOrUpdateRuntime(n.commonNode.Runtime.ID(), version) + rt.Capabilities.TEE = n.runtimeCapabilityTEE } return nil }) From 654d5d9085517736f48ea98803f356dc2ed289c2 Mon Sep 17 00:00:00 2001 From: Jernej Kos Date: Fri, 22 Apr 2022 13:17:40 +0200 Subject: [PATCH 2/3] go/worker/compute: Do not advertise old versions --- go/worker/compute/executor/committee/node.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/go/worker/compute/executor/committee/node.go b/go/worker/compute/executor/committee/node.go index 3944538a68c..f529b8666a7 100644 --- a/go/worker/compute/executor/committee/node.go +++ b/go/worker/compute/executor/committee/node.go @@ -1539,6 +1539,11 @@ func (n *Node) nudgeAvailability(force bool) { continue } + // Skip sending any old versions that will never be active again. + if version.ToU64() < n.runtimeVersion.ToU64() { + continue + } + rt := nd.AddOrUpdateRuntime(n.commonNode.Runtime.ID(), version) rt.Capabilities.TEE = n.runtimeCapabilityTEE } From 0131668d2740af1ce8c0fe785a66b9e7ae8d469d Mon Sep 17 00:00:00 2001 From: Jernej Kos Date: Sat, 23 Apr 2022 19:05:35 +0200 Subject: [PATCH 3/3] go/runtime/host/sandbox: Properly handle clone3 in seccomp policy We need to handle the clone3 syscall in a special manner as there are several complications to its handling: - Newer glibc versions will try clone3 first and if they see EPERM they will instantly fail making the program unable to spawn threads. - The clone3 syscall is much more complex than clone and so we can't simply inspect its flags as we do for clone. Therefore we need to reject the syscall with ENOSYS, causing fallback to clone. --- .changelog/4687.bugfix.md | 13 +++++++++++++ .../host/sandbox/process/seccomp_linux.go | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 .changelog/4687.bugfix.md diff --git a/.changelog/4687.bugfix.md b/.changelog/4687.bugfix.md new file mode 100644 index 00000000000..101874fd8e5 --- /dev/null +++ b/.changelog/4687.bugfix.md @@ -0,0 +1,13 @@ +go/runtime/host/sandbox: Properly handle clone3 in seccomp policy + +We need to handle the clone3 syscall in a special manner as there are +several complications to its handling: + +- Newer glibc versions will try clone3 first and if they see EPERM they + will instantly fail making the program unable to spawn threads. + +- The clone3 syscall is much more complex than clone and so we can't + simply inspect its flags as we do for clone. + +Therefore we need to reject the syscall with ENOSYS, causing fallback to +clone. diff --git a/go/runtime/host/sandbox/process/seccomp_linux.go b/go/runtime/host/sandbox/process/seccomp_linux.go index ac00e0cddda..40af33c6f28 100644 --- a/go/runtime/host/sandbox/process/seccomp_linux.go +++ b/go/runtime/host/sandbox/process/seccomp_linux.go @@ -355,5 +355,24 @@ func generateSeccompPolicy(out *os.File) error { return err } + // We need to handle the clone3 syscall in a special manner as there are several complications + // to its handling: + // + // - Newer glibc versions will try clone3 first and if they see EPERM they will instantly fail + // making the program unable to spawn threads. + // + // - The clone3 syscall is much more complex than clone and so we can't simply inspect its flags + // as above for clone. + // + // Therefore we need to reject the syscall with ENOSYS, causing fallback to clone. + clone3ID, err := seccomp.GetSyscallFromName("clone3") + if err != nil { + return err + } + err = filter.AddRule(clone3ID, seccomp.ActErrno.SetReturnCode(int16(syscall.ENOSYS))) + if err != nil { + return err + } + return filter.ExportBPF(out) }