From 0bdacd5af82095fe62c2de5331574b242ebb9532 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Fri, 26 Aug 2022 15:22:47 +0200 Subject: [PATCH 1/6] api/equinixmetal: Clean up device destruction I think we forgot to destroy the device in one place. Instead of trying to remember to add the destruction in every fail case, just destroy the device by default and skip doing it only if everything succeeds. --- platform/api/equinixmetal/api.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/platform/api/equinixmetal/api.go b/platform/api/equinixmetal/api.go index bb82facb3..4da46cb88 100644 --- a/platform/api/equinixmetal/api.go +++ b/platform/api/equinixmetal/api.go @@ -294,7 +294,13 @@ func (a *API) CreateOrUpdateDevice(hostname string, conf *conf.Conf, console Con if err != nil { return nil, fmt.Errorf("couldn't create device: %v", err) } + destroyDevice := true deviceID := device.ID + defer func() { + if destroyDevice { + a.DeleteDevice(deviceID) + } + }() plog.Debugf("Created device: %q", deviceID) @@ -302,20 +308,17 @@ func (a *API) CreateOrUpdateDevice(hostname string, conf *conf.Conf, console Con err := a.startConsole(deviceID, device.Facility.Code, console) consoleStarted = true if err != nil { - a.DeleteDevice(deviceID) return nil, err } } device, err = a.waitForActive(deviceID) if err != nil { - a.DeleteDevice(deviceID) return nil, err } ipAddress := a.GetDeviceAddress(device, 4, true) if ipAddress == "" { - a.DeleteDevice(deviceID) return nil, fmt.Errorf("no public IP address found for %v", deviceID) } @@ -323,7 +326,6 @@ func (a *API) CreateOrUpdateDevice(hostname string, conf *conf.Conf, console Con err = waitForInstall(ipAddress) if err != nil { - a.DeleteDevice(deviceID) return nil, fmt.Errorf("timed out waiting for flatcar-install: %v", err) } @@ -338,6 +340,7 @@ func (a *API) CreateOrUpdateDevice(hostname string, conf *conf.Conf, console Con plog.Debugf("Finished installation of device: %q", deviceID) + destroyDevice = false return device, nil } From 0e6ac42255e47e9887f0231af703afdcee3caf52 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Mon, 29 Aug 2022 14:15:28 +0200 Subject: [PATCH 2/6] api/equinixmetal: Exclude devices with block extended major I don't know if NVMe disks are a recent addition to s3.xlarge.x86 instances, or maybe those disks got shrunk, but the flatcar-install script was picking one of them to install flatcar on. For some reason, the boot agent is not able to boot from them, maybe there is some additional setup needed, which is missing. Fortunately the script already had an option of excluding devices by major numbers, so use this functionality. That way, the script installs the OS on one of `/dev/sdX` disks, which are bootable. For device majors see the kernel documentation at: https://www.kernel.org/doc/Documentation/admin-guide/devices.txt --- platform/api/equinixmetal/api.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/platform/api/equinixmetal/api.go b/platform/api/equinixmetal/api.go index 4da46cb88..2b07d3ef3 100644 --- a/platform/api/equinixmetal/api.go +++ b/platform/api/equinixmetal/api.go @@ -444,7 +444,9 @@ ExecStart=/usr/bin/curl --retry-delay 1 --retry 120 --retry-connrefused --retry- ExecStartPre=-/bin/bash -c 'lvchange -an /dev/mapper/*' ExecStartPre=-/bin/bash -c 'shopt -s nullglob; for disk in /dev/*d? /dev/nvme?n1; do wipefs --all --force $${disk}; done' -ExecStart=/usr/bin/flatcar-install -s -f image.bin.bz2 %v /userdata +# 259 is a major number of NVMe devices. They need to be excluded, because +# the boot agent can't boot from them. +ExecStart=/usr/bin/flatcar-install -s -e 259 -f image.bin.bz2 %v /userdata ExecStart=/usr/bin/systemctl --no-block isolate reboot.target From 6be664ee3b5e5b879a6014ba39f1a692f304c099 Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Tue, 30 Aug 2022 08:57:48 +0200 Subject: [PATCH 3/6] changelog: Add an entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9889dfb40..386466ac9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix version check in kubeadm tests ([#353](https://github.com/flatcar-linux/mantle/pull/353)) - Make Calico testing in kubeadm tests more reliable ([#359](https://github.com/flatcar-linux/mantle/pull/359)) +- Fix running tests on Equinix Metal s3.xlarge.x86 instanes ([#364](https://github.com/flatcar-linux/mantle/pull/364)) ## [0.18.0] - 12/01/2022 ### Security From d8b8eb307d1c3b42bc31568f706732f7fa3746ee Mon Sep 17 00:00:00 2001 From: Mathieu Tortuyaux Date: Tue, 30 Aug 2022 15:27:05 +0200 Subject: [PATCH 4/6] kubeadm/cilium: bump CLI and tested Cilium version Starting from Cilium 1.12, {live,ready}ness probes are on :9879 while it was on :9876 on older versions. (See: https://github.com/cilium/cilium/commit/22cd47ef496be1c0b78ff8d146b2240810e78978) CLI made this change on versions greater or equal to 1.10.12 (See: https://github.com/cilium/cilium-cli/pull/869/files) - it results with a port mismatch 9879/9876 if we test the version 1.11.5. Basically Cilium is running fine but its status is not ready/healthy from a Kubernetes PoV. Signed-off-by: Mathieu Tortuyaux --- kola/tests/kubeadm/kubeadm.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kola/tests/kubeadm/kubeadm.go b/kola/tests/kubeadm/kubeadm.go index 4ffb67702..b374fa669 100644 --- a/kola/tests/kubeadm/kubeadm.go +++ b/kola/tests/kubeadm/kubeadm.go @@ -73,8 +73,8 @@ var ( testConfig = map[string]map[string]interface{}{ "v1.24.1": map[string]interface{}{ "FlannelVersion": "v0.18.1", - "CiliumVersion": "1.11.5", - "CiliumCLIVersion": "v0.10.7", + "CiliumVersion": "1.12.1", + "CiliumCLIVersion": "v0.12.2", "CNIVersion": "v1.1.1", "CRIctlVersion": "v1.24.2", "ReleaseVersion": "v0.13.0", @@ -98,8 +98,8 @@ var ( }, "v1.23.4": map[string]interface{}{ "FlannelVersion": "v0.16.3", - "CiliumVersion": "1.11.2", - "CiliumCLIVersion": "v0.10.2", + "CiliumVersion": "1.12.1", + "CiliumCLIVersion": "v0.12.2", "CNIVersion": "v1.0.1", "CRIctlVersion": "v1.22.0", "ReleaseVersion": "v0.4.0", @@ -123,8 +123,8 @@ var ( }, "v1.22.7": map[string]interface{}{ "FlannelVersion": "v0.16.3", - "CiliumVersion": "1.11.2", - "CiliumCLIVersion": "v0.10.2", + "CiliumVersion": "1.12.1", + "CiliumCLIVersion": "v0.12.2", "CNIVersion": "v1.0.1", "CRIctlVersion": "v1.22.0", "ReleaseVersion": "v0.4.0", From 39761f3b58e08c14f374813b734701bd2cde1243 Mon Sep 17 00:00:00 2001 From: Mathieu Tortuyaux Date: Tue, 30 Aug 2022 15:34:01 +0200 Subject: [PATCH 5/6] kubeadm/cilium: patch Cilium daemon set This is required even with Permissive mode. Can be dropped once `spc_t` is supported on Flatcar. Signed-off-by: Mathieu Tortuyaux --- kola/tests/kubeadm/kubeadm.go | 8 ++++++-- kola/tests/kubeadm/templates.go | 1 + kola/tests/kubeadm/testdata/master-cilium-script.sh | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/kola/tests/kubeadm/kubeadm.go b/kola/tests/kubeadm/kubeadm.go index b374fa669..d92ada485 100644 --- a/kola/tests/kubeadm/kubeadm.go +++ b/kola/tests/kubeadm/kubeadm.go @@ -54,8 +54,12 @@ var ( _ = c.MustSSH(controller, "/opt/bin/cilium uninstall") version := params["CiliumVersion"].(string) cidr := params["PodSubnet"].(string) - cmd := fmt.Sprintf("/opt/bin/cilium install --config enable-endpoint-routes=true --config cluster-pool-ipv4-cidr=%s --version=%s --encryption=ipsec --wait --wait-duration 1m", cidr, version) - _ = c.MustSSH(controller, cmd) + cmd := fmt.Sprintf("/opt/bin/cilium install --config enable-endpoint-routes=true --config cluster-pool-ipv4-cidr=%s --version=%s --encryption=ipsec --wait-duration=1s --rollback=false", cidr, version) + _, _ = c.SSH(controller, cmd) + patch := `/opt/bin/kubectl --namespace kube-system patch daemonset/cilium -p '{"spec":{"template":{"spec":{"containers":[{"name":"cilium-agent","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}}],"initContainers":[{"name":"mount-cgroup","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}},{"name":"apply-sysctl-overwrites","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}},{"name":"clean-cilium-state","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}}]}}}}'` + _ = c.MustSSH(controller, patch) + status := "/opt/bin/cilium status --wait --wait-duration 1m" + _ = c.MustSSH(controller, status) }, }, }, diff --git a/kola/tests/kubeadm/templates.go b/kola/tests/kubeadm/templates.go index 1538b61a1..efe09b343 100644 --- a/kola/tests/kubeadm/templates.go +++ b/kola/tests/kubeadm/templates.go @@ -401,6 +401,7 @@ EOF --config enable-endpoint-routes=true \ --config cluster-pool-ipv4-cidr={{ .PodSubnet }} \ --version={{ .CiliumVersion }} 2>&1 | iconv --from-code utf-8 --to-code ascii//TRANSLIT + kubectl --namespace kube-system patch daemonset/cilium -p '{"spec":{"template":{"spec":{"containers":[{"name":"cilium-agent","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}}],"initContainers":[{"name":"mount-cgroup","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}},{"name":"apply-sysctl-overwrites","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}},{"name":"clean-cilium-state","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}}]}}}}' # --wait will wait for status to report success /opt/bin/cilium status --wait 2>&1 | iconv --from-code utf-8 --to-code ascii//TRANSLIT {{ end }} diff --git a/kola/tests/kubeadm/testdata/master-cilium-script.sh b/kola/tests/kubeadm/testdata/master-cilium-script.sh index 72c099e77..7964f8930 100644 --- a/kola/tests/kubeadm/testdata/master-cilium-script.sh +++ b/kola/tests/kubeadm/testdata/master-cilium-script.sh @@ -91,6 +91,7 @@ EOF --config enable-endpoint-routes=true \ --config cluster-pool-ipv4-cidr=192.168.0.0/17 \ --version=v0.11.1 2>&1 | iconv --from-code utf-8 --to-code ascii//TRANSLIT + kubectl --namespace kube-system patch daemonset/cilium -p '{"spec":{"template":{"spec":{"containers":[{"name":"cilium-agent","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}}],"initContainers":[{"name":"mount-cgroup","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}},{"name":"apply-sysctl-overwrites","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}},{"name":"clean-cilium-state","securityContext":{"seLinuxOptions":{"level":"s0","type":"unconfined_t"}}}]}}}}' # --wait will wait for status to report success /opt/bin/cilium status --wait 2>&1 | iconv --from-code utf-8 --to-code ascii//TRANSLIT From 6e68db3c2340c51e14830c4327a65df2cc3d8ff9 Mon Sep 17 00:00:00 2001 From: Mathieu Tortuyaux Date: Tue, 30 Aug 2022 15:35:28 +0200 Subject: [PATCH 6/6] kubeadm/cilium: disable SELinux for Cilium Once the daemon set is started with `unconfined_t` it does not work with enforced SELinux because it hits a denial from transitioning to `kernel_t` to `unconfined_t` (and this normal because currently everything runs with `kernel_t` including container runtimes) Can be dropped once it works fine with `spc_t` label with the SELinux upgrade. Signed-off-by: Mathieu Tortuyaux --- kola/tests/kubeadm/kubeadm.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kola/tests/kubeadm/kubeadm.go b/kola/tests/kubeadm/kubeadm.go index d92ada485..deb9b5bf8 100644 --- a/kola/tests/kubeadm/kubeadm.go +++ b/kola/tests/kubeadm/kubeadm.go @@ -186,7 +186,7 @@ func init() { major = 3140 } - if CNI == "flannel" { + if CNI == "flannel" || CNI == "cilium" { flags = append(flags, register.NoEnableSelinux) }