From 23e02c7927c352cb4d865414e68f3a776ff6acc8 Mon Sep 17 00:00:00 2001 From: Alejandro Pedraza Date: Thu, 23 Jan 2025 07:18:18 -0500 Subject: [PATCH] fix(multicluster)!: Link's probeSpec.period should be formatted as duration (#13586) Before the refactoring from #13420, `link mc link` would generate a Link resource with `probeSpec.period` formatted as duration. After that change, it was generated as an integer and the service mirror was refactored as well to consume that integer and convert it into a duration. If a Link with the old semantics is consumed by a service mirror controller containing that change (or vice-versa) it will error out with "could not parse probe period". This could happen for example when re-linking as part of a multicluster upgrade, when pending the rollout the old controller would consume the upgraded Link. Or for folks upgrading just by bumping the controller image tag and not re-linking (unwise as that may be). This fix consists on: - Updating the Link CRD, constraining `period` to be formatted as duration - Having the `linkerd mc link` command produce a duration value for that period, being lenient to the gateway service annotation `mirror.linkerd.io/probe-period` either using an integer (which it does by default) or a duration - Having the probe worker consume that value as a duration BREAKING CHANGE: this realigns behavior with edge-24.11.8, but Links generated with edge-25.1.1 would be rejected. --- .../linkerd-multicluster/templates/link-crd.yaml | 1 + multicluster/cmd/link.go | 15 ++++++++++++++- multicluster/cmd/testdata/install_default.golden | 1 + multicluster/cmd/testdata/install_ha.golden | 1 + multicluster/cmd/testdata/install_psp.golden | 1 + multicluster/service-mirror/probe_worker.go | 3 +-- 6 files changed, 19 insertions(+), 3 deletions(-) diff --git a/multicluster/charts/linkerd-multicluster/templates/link-crd.yaml b/multicluster/charts/linkerd-multicluster/templates/link-crd.yaml index e2b8a0c7a3fc6..d24eb02d18c4a 100644 --- a/multicluster/charts/linkerd-multicluster/templates/link-crd.yaml +++ b/multicluster/charts/linkerd-multicluster/templates/link-crd.yaml @@ -158,6 +158,7 @@ spec: type: string period: description: Interval in between probe requests + format: duration type: string port: description: Port of remote gateway health endpoint diff --git a/multicluster/cmd/link.go b/multicluster/cmd/link.go index 045d2d5070578..117af4cb97324 100644 --- a/multicluster/cmd/link.go +++ b/multicluster/cmd/link.go @@ -7,7 +7,9 @@ import ( "fmt" "os" "path" + "strconv" "strings" + "time" "github.com/linkerd/linkerd2/controller/gen/apis/link/v1alpha2" "github.com/linkerd/linkerd2/multicluster/static" @@ -599,10 +601,21 @@ func extractProbeSpec(gateway *corev1.Service) (v1alpha2.ProbeSpec, error) { return v1alpha2.ProbeSpec{}, err } + // the `mirror.linkerd.io/probe-period` annotation is initialized with a + // default value of "3", but we require a duration-formatted string. So we + // perform the conversion, if required. + period := gateway.Annotations[k8s.GatewayProbePeriod] + if secs, err := strconv.ParseInt(period, 10, 64); err == nil { + dur := time.Duration(secs) * time.Second + period = dur.String() + } else if _, err := time.ParseDuration(period); err != nil { + return v1alpha2.ProbeSpec{}, fmt.Errorf("could not parse probe period: %w", err) + } + return v1alpha2.ProbeSpec{ Path: path, Port: fmt.Sprintf("%d", port), - Period: gateway.Annotations[k8s.GatewayProbePeriod], + Period: period, }, nil } diff --git a/multicluster/cmd/testdata/install_default.golden b/multicluster/cmd/testdata/install_default.golden index 32b4e1aea736e..ec7610c0aa543 100644 --- a/multicluster/cmd/testdata/install_default.golden +++ b/multicluster/cmd/testdata/install_default.golden @@ -396,6 +396,7 @@ spec: type: string period: description: Interval in between probe requests + format: duration type: string port: description: Port of remote gateway health endpoint diff --git a/multicluster/cmd/testdata/install_ha.golden b/multicluster/cmd/testdata/install_ha.golden index 88488f7879cd5..cb2a4b747a48c 100644 --- a/multicluster/cmd/testdata/install_ha.golden +++ b/multicluster/cmd/testdata/install_ha.golden @@ -468,6 +468,7 @@ spec: type: string period: description: Interval in between probe requests + format: duration type: string port: description: Port of remote gateway health endpoint diff --git a/multicluster/cmd/testdata/install_psp.golden b/multicluster/cmd/testdata/install_psp.golden index 7a91e393f025b..0542a4033f3a8 100644 --- a/multicluster/cmd/testdata/install_psp.golden +++ b/multicluster/cmd/testdata/install_psp.golden @@ -430,6 +430,7 @@ spec: type: string period: description: Interval in between probe requests + format: duration type: string port: description: Port of remote gateway health endpoint diff --git a/multicluster/service-mirror/probe_worker.go b/multicluster/service-mirror/probe_worker.go index 42e9989f19342..09f10940a6ccc 100644 --- a/multicluster/service-mirror/probe_worker.go +++ b/multicluster/service-mirror/probe_worker.go @@ -70,12 +70,11 @@ func (pw *ProbeWorker) run() { pw.log.Error("Probe spec is nil") return } - probeTickerPeriodSeconds, err := strconv.ParseInt(pw.probeSpec.Period, 10, 64) + probeTickerPeriod, err := time.ParseDuration(pw.probeSpec.Period) if err != nil { pw.log.Errorf("could not parse probe period: %s", err) return } - probeTickerPeriod := time.Duration(probeTickerPeriodSeconds) * time.Second maxJitter := probeTickerPeriod / 10 // max jitter is 10% of period probeTicker := NewTicker(probeTickerPeriod, maxJitter) defer probeTicker.Stop()