From fee97789050efa4c516103430e2de1784167772a Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Thu, 28 Apr 2022 13:26:10 -0400 Subject: [PATCH] Fix ansible-operator finalizer concurrency issue (#5678) * Fix ansible-operator finalizer concurrency issue For ansible-based operators, this change fixes an issue that caused finalizers to fail to run if the watched resource (CR) is deleted during reconciliation. Fixes #4909 Signed-off-by: Austin Macdonald * dont change constants Signed-off-by: Austin Macdonald * change name so we dont have a half-made test Signed-off-by: Austin Macdonald * PR cleanup Signed-off-by: Austin Macdonald * rm extra --- Signed-off-by: Austin Macdonald --- Makefile | 12 +++- changelog/fragments/finalizerconcurrency.yaml | 11 ++++ .../internal/ansible/advanced_molecule.go | 39 ++++++++++++- .../samples/internal/ansible/constants.go | 1 - .../finalizerconcurrencyfinalizer.yml | 34 +++++++++++ .../tasks/finalizerconcurrencytest_test.yml | 58 +++++++++++++++++++ .../testdata/tasks/subresourcestest_test.yml | 2 - .../internal/ansible/testdata/watches.yaml | 10 ++++ internal/ansible/controller/reconcile.go | 6 +- 9 files changed, 164 insertions(+), 9 deletions(-) create mode 100644 changelog/fragments/finalizerconcurrency.yaml create mode 100644 hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml create mode 100644 hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrencytest_test.yml diff --git a/Makefile b/Makefile index 0466faf6ca6..3818709c0e4 100644 --- a/Makefile +++ b/Makefile @@ -156,10 +156,16 @@ e2e_targets := test-e2e $(e2e_tests) export KIND_CLUSTER := osdk-test KUBEBUILDER_ASSETS = $(PWD)/$(shell go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest && $(shell go env GOPATH)/bin/setup-envtest use $(ENVTEST_K8S_VERSION) --bin-dir tools/bin/ -p path) -test-e2e-setup: build +test-e2e-setup:: build dev-install cluster-create + +.PHONY: cluster-create +cluster-create:: + [[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER) + +.PHONY: dev-install +dev-install:: $(SCRIPTS_DIR)/fetch kind 0.11.0 $(SCRIPTS_DIR)/fetch kubectl $(ENVTEST_K8S_VERSION) # Install kubectl AFTER envtest because envtest includes its own kubectl binary - [[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER) .PHONY: test-e2e-teardown test-e2e-teardown: @@ -177,7 +183,7 @@ test-e2e-go:: image/custom-scorecard-tests ## Run Go e2e tests test-e2e-ansible:: image/ansible-operator ## Run Ansible e2e tests go test -count=1 ./internal/ansible/proxy/... go test ./test/e2e/ansible -v -ginkgo.v -test-e2e-ansible-molecule:: image/ansible-operator ## Run molecule-based Ansible e2e tests +test-e2e-ansible-molecule:: install dev-install image/ansible-operator ## Run molecule-based Ansible e2e tests go run ./hack/generate/samples/molecule/generate.go ./hack/tests/e2e-ansible-molecule.sh test-e2e-helm:: image/helm-operator ## Run Helm e2e tests diff --git a/changelog/fragments/finalizerconcurrency.yaml b/changelog/fragments/finalizerconcurrency.yaml new file mode 100644 index 00000000000..121e4cba677 --- /dev/null +++ b/changelog/fragments/finalizerconcurrency.yaml @@ -0,0 +1,11 @@ +# entries is a list of entries to include in +# release notes and/or the migration guide +entries: + - description: > + For ansible-based operators, this change fixes an issue that caused + finalizers to fail to run if the watched resource (CR) is deleted during + reconciliation. + kind: "bugfix" + + # Is this a breaking change? + breaking: false diff --git a/hack/generate/samples/internal/ansible/advanced_molecule.go b/hack/generate/samples/internal/ansible/advanced_molecule.go index f603e2bb76f..0cba602e8fe 100644 --- a/hack/generate/samples/internal/ansible/advanced_molecule.go +++ b/hack/generate/samples/internal/ansible/advanced_molecule.go @@ -235,6 +235,12 @@ func (ma *AdvancedMolecule) addMocksFromTestdata() { cmd = exec.Command("cp", "-r", "../../../hack/generate/samples/internal/ansible/testdata/inventory/", filepath.Join(ma.ctx.Dir, "inventory/")) _, err = ma.ctx.Run(cmd) pkg.CheckError("adding inventory/", err) + + log.Infof("adding finalizer for finalizerconcurrencytest") + cmd = exec.Command("cp", "../../../hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml", filepath.Join(ma.ctx.Dir, "playbooks/finalizerconcurrencyfinalizer.yml")) + _, err = ma.ctx.Run(cmd) + pkg.CheckError("adding finalizer for finalizerconccurencytest", err) + } func (ma *AdvancedMolecule) updateDockerfile() { @@ -456,8 +462,6 @@ func (ma *AdvancedMolecule) updatePlaybooks() { command: '{{ exec_command }}' register: exec_result - - debug: var=exec_result - - name: Get logs from busybox pod k8s_log: name: '{{ meta.name }}-busybox' @@ -516,6 +520,36 @@ func (ma *AdvancedMolecule) updatePlaybooks() { clusterAnnotationTest) pkg.CheckError("adding playbook for clusterannotationtest", err) + log.Infof("adding playbook for finalizerconcurrencytest") + const finalizerConcurrencyTest = `--- +- hosts: localhost + gather_facts: no + collections: + - kubernetes.core + - operator_sdk.util + + tasks: + - debug: + msg: "Pausing until configmap exists" + + - name: Wait for configmap + k8s_info: + apiVersion: v1 + kind: ConfigMap + name: unpause-reconciliation + namespace: osdk-test + wait: yes + wait_sleep: 10 + wait_timeout: 360 + + - debug: + msg: "Unpause!" +` + err = kbutil.ReplaceInFile( + filepath.Join(ma.ctx.Dir, "playbooks", "finalizerconcurrencytest.yml"), + originalPlaybookFragment, + finalizerConcurrencyTest) + pkg.CheckError("adding playbook for finalizerconcurrencytest", err) } func (ma *AdvancedMolecule) addPlaybooks() { @@ -524,6 +558,7 @@ func (ma *AdvancedMolecule) addPlaybooks() { "CaseTest", "CollectionTest", "ClusterAnnotationTest", + "FinalizerConcurrencyTest", "ReconciliationTest", "SelectorTest", "SubresourcesTest", diff --git a/hack/generate/samples/internal/ansible/constants.go b/hack/generate/samples/internal/ansible/constants.go index 1a3b463dc3b..17958babe69 100644 --- a/hack/generate/samples/internal/ansible/constants.go +++ b/hack/generate/samples/internal/ansible/constants.go @@ -418,7 +418,6 @@ const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached depl )}}'` const molecuTaskToCheckConfigMap = ` - - name: Create ConfigMap that the Operator should delete k8s: definition: diff --git a/hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml b/hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml new file mode 100644 index 00000000000..2cc57f3b112 --- /dev/null +++ b/hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml @@ -0,0 +1,34 @@ +--- +- hosts: localhost + gather_facts: no + collections: + - kubernetes.core + - operator_sdk.util + + tasks: + - debug: + msg: "Pausing until configmap exists" + + - name: Wait for configmap + k8s_info: + api_version: v1 + kind: ConfigMap + name: finalizer-concurrency-results + namespace: osdk-test + wait: yes + wait_sleep: 10 + wait_timeout: 30 + + - name: Update configmap + k8s: + state: present + force: yes + definition: + apiVersion: v1 + kind: ConfigMap + metadata: + name: finalizer-concurrency-results + namespace: osdk-test + data: + finalizer: "success" + wait: yes diff --git a/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrencytest_test.yml b/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrencytest_test.yml new file mode 100644 index 00000000000..8131669c6c8 --- /dev/null +++ b/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrencytest_test.yml @@ -0,0 +1,58 @@ +--- +# TODO(asmacdo) this should be the only task. the other is getting magiced in +- name: Create the test.example.com/v1alpha1.FinalizerConcurrencyTest + k8s: + state: present + definition: + apiVersion: test.example.com/v1alpha1 + kind: FinalizerConcurrencyTest + metadata: + name: finalizer-concurrency-test + namespace: '{{ namespace }}' + wait: no + +- name: While reconcile is paused, delete the CR + k8s: + state: absent + definition: + apiVersion: test.example.com/v1alpha1 + kind: FinalizerConcurrencyTest + metadata: + name: finalizer-concurrency-test + namespace: '{{ namespace }}' + wait: no + +- name: Create a configmap to allow reconciliation to unpause + k8s: + state: present + definition: + apiVersion: v1 + kind: ConfigMap + metadata: + name: finalizer-concurrency-results + namespace: osdk-test + wait: no + +- name: Wait for the custom resource to be deleted + k8s_info: + api_version: test.example.com/v1alpha1 + kind: FinalizerConcurrencyTest + namespace: osdk-test # TODO(asmacdo) Fixme + name: finalizer-concurrency-test + register: cr + retries: 10 + delay: 6 + until: not cr.resources + failed_when: cr.resources + +- name: Retrive the cm + k8s_info: + api_version: v1 + kind: ConfigMap + name: finalizer-concurrency-results + namespace: osdk-test + register: finalizer_test + +- name: Assert that finalizer ran + assert: + that: finalizer_test.resources.0.data.finalizer== 'success' diff --git a/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml b/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml index 4a175a4e32e..7a12b945c83 100644 --- a/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml +++ b/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml @@ -18,8 +18,6 @@ status: "True" register: subresources_test -- debug: var=subresources_test - - name: Assert stdout and stderr are properly set in status assert: that: diff --git a/hack/generate/samples/internal/ansible/testdata/watches.yaml b/hack/generate/samples/internal/ansible/testdata/watches.yaml index c94cf3ea98e..1adfbd9fa48 100644 --- a/hack/generate/samples/internal/ansible/testdata/watches.yaml +++ b/hack/generate/samples/internal/ansible/testdata/watches.yaml @@ -71,4 +71,14 @@ watchClusterScopedResources: true vars: meta: '{{ ansible_operator_meta }}' + +- version: v1alpha1 + group: test.example.com + kind: FinalizerConcurrencyTest + playbook: playbooks/finalizerconcurrencytest.yml + finalizer: + name: test.example.com/finalizer + playbook: playbooks/finalizerconcurrencyfinalizer.yml + vars: + meta: '{{ ansible_operator_meta }}' #+kubebuilder:scaffold:watch diff --git a/internal/ansible/controller/reconcile.go b/internal/ansible/controller/reconcile.go index 45a259a4373..a85738f51c4 100644 --- a/internal/ansible/controller/reconcile.go +++ b/internal/ansible/controller/reconcile.go @@ -243,8 +243,9 @@ func (r *AnsibleOperatorReconciler) Reconcile(ctx context.Context, request recon // and do it at the end runSuccessful := len(failureMessages) == 0 + recentlyDeleted := u.GetDeletionTimestamp() != nil + // The finalizer has run successfully, time to remove it - deleted = u.GetDeletionTimestamp() != nil if deleted && finalizerExists && runSuccessful { controllerutil.RemoveFinalizer(u, finalizer) err := r.Client.Update(ctx, u) @@ -252,6 +253,9 @@ func (r *AnsibleOperatorReconciler) Reconcile(ctx context.Context, request recon logger.Error(err, "Failed to remove finalizer") return reconcileResult, err } + } else if recentlyDeleted && finalizerExists { + // If the CR was deleted after the reconcile began, we need to requeue for the finalizer. + reconcileResult.Requeue = true } if r.ManageStatus { errmark := r.markDone(ctx, request.NamespacedName, u, statusEvent, failureMessages)