diff --git a/README.md b/README.md index 59077cd86ab..347a6e45935 100644 --- a/README.md +++ b/README.md @@ -199,10 +199,22 @@ task talos:apply-node IP=? MODE=? # e.g. task talos:apply-node IP=10.10.10.10 MODE=auto ``` -### ⬆️ Updating Talos and Kubernetes versions +### ⬆️ Upgrading Talos and Kubernetes versions +#### Method 1: System Upgrade Controller (SUC) + +> [!IMPORTANT] +> In order to upgrade make sure `TALOS_VERSION` and `KUBERNETES_VERSION` in `kubernetes/apps/kube-system/system-upgrade/ks.yaml` are set to the versions you wish to upgrade to. Once your cluster receives this configuration the upgrade processes will kick off in the `kube-system` namespace. These versions are under the watch of renovate, which means once the pull requests is merged SUC will attempt to upgrade Kubernetes / Talos and reboot. + +Talos and Kubernetes upgrades should be handled via the [rancher/system-upgrade-controller](https://github.com/rancher/system-upgrade-controller) which is deployed in the `kube-system` namespace. + +#### Method 2: Taskfile + +> [!WARNING] +> Upgrading via this method can interfere with the System Upgrade Controller. SUC could potentially downgrade Talos or Kubernetes versions if care is not taken. +--- > [!IMPORTANT] -> Ensure the `talosVersion` and `kubernetesVersion` in `talconfig.yaml` are up-to-date with the version you wish to upgrade to. +> In order to upgrade make sure `talosVersion` and `kubernetesVersion` in `talconfig.yaml` are set to the versions you wish to upgrade to. ```sh # Upgrade node to a newer Talos version diff --git a/templates/config/kubernetes/apps/default/echo-server/app/helmrelease.yaml.j2 b/templates/config/kubernetes/apps/default/echo-server/app/helmrelease.yaml.j2 index ee8298d3901..b611cfa22a7 100644 --- a/templates/config/kubernetes/apps/default/echo-server/app/helmrelease.yaml.j2 +++ b/templates/config/kubernetes/apps/default/echo-server/app/helmrelease.yaml.j2 @@ -65,7 +65,6 @@ spec: runAsNonRoot: true runAsUser: 65534 runAsGroup: 65534 - seccompProfile: { type: RuntimeDefault } service: app: controller: echo-server diff --git a/templates/config/kubernetes/apps/kube-system/kustomization.yaml.j2 b/templates/config/kubernetes/apps/kube-system/kustomization.yaml.j2 index 379b674a04f..ac4ecd3c8dc 100644 --- a/templates/config/kubernetes/apps/kube-system/kustomization.yaml.j2 +++ b/templates/config/kubernetes/apps/kube-system/kustomization.yaml.j2 @@ -11,3 +11,4 @@ resources: - ./metrics-server/ks.yaml - ./reloader/ks.yaml - ./spegel/ks.yaml + - ./system-upgrade/ks.yaml diff --git a/templates/config/kubernetes/apps/kube-system/system-upgrade/app/helmrelease.yaml.j2 b/templates/config/kubernetes/apps/kube-system/system-upgrade/app/helmrelease.yaml.j2 new file mode 100644 index 00000000000..9286d864596 --- /dev/null +++ b/templates/config/kubernetes/apps/kube-system/system-upgrade/app/helmrelease.yaml.j2 @@ -0,0 +1,64 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/bjw-s/helm-charts/main/charts/other/app-template/schemas/helmrelease-helm-v2.schema.json +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: &app system-upgrade +spec: + interval: 30m + chart: + spec: + chart: app-template + version: 3.7.1 + sourceRef: + kind: HelmRepository + name: bjw-s + namespace: flux-system + install: + remediation: + retries: 3 + upgrade: + cleanupOnFail: true + remediation: + strategy: rollback + retries: 3 + values: + controllers: + system-upgrade: + strategy: RollingUpdate + containers: + app: + image: + repository: docker.io/rancher/system-upgrade-controller + tag: v0.15.0-rc2@sha256:d6faa9cb5123ae14cfbf0e9e22fa5302e1369649a6f1d117874c30a2a8df732b + env: + SYSTEM_UPGRADE_CONTROLLER_NAME: *app + SYSTEM_UPGRADE_CONTROLLER_NAMESPACE: + valueFrom: + fieldRef: + fieldPath: metadata.namespace + SYSTEM_UPGRADE_JOB_BACKOFF_LIMIT: "99" + SYSTEM_UPGRADE_JOB_PRIVILEGED: false + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: { drop: ["ALL"] } + defaultPodOptions: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + securityContext: + runAsNonRoot: true + runAsUser: 65534 + runAsGroup: 65534 + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + serviceAccount: + name: *app + create: true diff --git a/templates/config/kubernetes/apps/kube-system/system-upgrade/app/kustomization.yaml.j2 b/templates/config/kubernetes/apps/kube-system/system-upgrade/app/kustomization.yaml.j2 new file mode 100644 index 00000000000..adb2a4f6dcc --- /dev/null +++ b/templates/config/kubernetes/apps/kube-system/system-upgrade/app/kustomization.yaml.j2 @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./helmrelease.yaml + - ./rbac.yaml diff --git a/templates/config/kubernetes/apps/kube-system/system-upgrade/app/rbac.yaml.j2 b/templates/config/kubernetes/apps/kube-system/system-upgrade/app/rbac.yaml.j2 new file mode 100644 index 00000000000..6d083d6d56d --- /dev/null +++ b/templates/config/kubernetes/apps/kube-system/system-upgrade/app/rbac.yaml.j2 @@ -0,0 +1,20 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system-upgrade +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: + - kind: ServiceAccount + name: system-upgrade + namespace: kube-system +--- +apiVersion: talos.dev/v1alpha1 +kind: ServiceAccount +metadata: + name: system-upgrade +spec: + roles: ["os:admin"] diff --git a/templates/config/kubernetes/apps/kube-system/system-upgrade/ks.yaml.j2 b/templates/config/kubernetes/apps/kube-system/system-upgrade/ks.yaml.j2 new file mode 100644 index 00000000000..decef188bcf --- /dev/null +++ b/templates/config/kubernetes/apps/kube-system/system-upgrade/ks.yaml.j2 @@ -0,0 +1,51 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app system-upgrade + namespace: &namespace kube-system +spec: + commonMetadata: + labels: + app.kubernetes.io/name: *app + interval: 30m + path: ./kubernetes/apps/kube-system/system-upgrade/app + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: *namespace + timeout: 5m + wait: true +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/fluxcd-community/flux2-schemas/main/kustomization-kustomize-v1.json +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: &app system-upgrade-plans + namespace: &namespace kube-system +spec: + commonMetadata: + labels: + app.kubernetes.io/name: *app + dependsOn: + - name: system-upgrade + namespace: kube-system + interval: 30m + path: ./kubernetes/apps/kube-system/system-upgrade/plans + postBuild: + substitute: + # renovate: datasource=docker depName=ghcr.io/siderolabs/installer + TALOS_VERSION: v1.9.3 + # renovate: datasource=docker depName=ghcr.io/siderolabs/kubelet + KUBERNETES_VERSION: v1.32.2 + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: *namespace + timeout: 5m + wait: false diff --git a/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/kubernetes.yaml.j2 b/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/kubernetes.yaml.j2 new file mode 100644 index 00000000000..0b66e9efcda --- /dev/null +++ b/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/kubernetes.yaml.j2 @@ -0,0 +1,25 @@ +--- +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: kubernetes +spec: + version: ${KUBERNETES_VERSION} + concurrency: 1 + postCompleteDelay: 30s + exclusive: true + serviceAccountName: system-upgrade + secrets: + - name: system-upgrade + path: /var/run/secrets/talos.dev + ignoreUpdates: true + nodeSelector: + matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + upgrade: + image: ghcr.io/siderolabs/talosctl:${TALOS_VERSION} + args: + - --nodes=$(SYSTEM_UPGRADE_NODE_NAME) + - upgrade-k8s + - --to=$(SYSTEM_UPGRADE_PLAN_LATEST_VERSION) diff --git a/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/kustomization.yaml.j2 b/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/kustomization.yaml.j2 new file mode 100644 index 00000000000..061d8ad0d0e --- /dev/null +++ b/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/kustomization.yaml.j2 @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://json.schemastore.org/kustomization +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./kubernetes.yaml + - ./talos.yaml diff --git a/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/talos.yaml.j2 b/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/talos.yaml.j2 new file mode 100644 index 00000000000..179f5eecb40 --- /dev/null +++ b/templates/config/kubernetes/apps/kube-system/system-upgrade/plans/talos.yaml.j2 @@ -0,0 +1,25 @@ +--- +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: talos +spec: + version: ${TALOS_VERSION} + concurrency: 1 + postCompleteDelay: 2m + exclusive: true + serviceAccountName: system-upgrade + secrets: + - name: system-upgrade + path: /var/run/secrets/talos.dev + ignoreUpdates: true + nodeSelector: + matchExpressions: + - key: kubernetes.io/os + operator: In + values: ["linux"] + upgrade: + image: ghcr.io/jfroy/tnu:0.4.0 + args: + - --node=$(SYSTEM_UPGRADE_NODE_NAME) + - --tag=$(SYSTEM_UPGRADE_PLAN_LATEST_VERSION) diff --git a/templates/config/kubernetes/apps/network/external/cloudflared/helmrelease.yaml.j2 b/templates/config/kubernetes/apps/network/external/cloudflared/helmrelease.yaml.j2 index 4cfb0384f07..705c9836da5 100644 --- a/templates/config/kubernetes/apps/network/external/cloudflared/helmrelease.yaml.j2 +++ b/templates/config/kubernetes/apps/network/external/cloudflared/helmrelease.yaml.j2 @@ -77,7 +77,6 @@ spec: runAsNonRoot: true runAsUser: 65534 runAsGroup: 65534 - seccompProfile: { type: RuntimeDefault } service: app: controller: cloudflared diff --git a/templates/config/kubernetes/bootstrap/talos/patches/controller/machine-features.yaml.j2 b/templates/config/kubernetes/bootstrap/talos/patches/controller/machine-features.yaml.j2 new file mode 100644 index 00000000000..7a025911391 --- /dev/null +++ b/templates/config/kubernetes/bootstrap/talos/patches/controller/machine-features.yaml.j2 @@ -0,0 +1,6 @@ +machine: + features: + kubernetesTalosAPIAccess: + enabled: true + allowedRoles: ["os:admin"] + allowedKubernetesNamespaces: ["kube-system"]