From 5d3b1c38edd707ab4de356a0b8fbe6b403dae9ca Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Wed, 13 Apr 2022 13:29:09 -0400 Subject: [PATCH 1/5] Fix ansible-operator finalizer concurrency issue For ansible-based operators, this change fixes an issue that caused finalizers to fail to run if the watched resource (CR) is deleted during reconciliation. Fixes #4909 Signed-off-by: Austin Macdonald --- Makefile | 10 +++- changelog/fragments/finalizerconcurrency.yaml | 11 ++++ .../internal/ansible/advanced_molecule.go | 40 ++++++++++++- .../samples/internal/ansible/constants.go | 8 +-- .../finalizerconcurrencyfinalizer.yml | 34 +++++++++++ .../tasks/finalizerconcurrency_test.yml | 58 +++++++++++++++++++ .../testdata/tasks/subresourcestest_test.yml | 2 - .../internal/ansible/testdata/watches.yaml | 10 ++++ hack/tests/e2e-ansible-molecule.sh | 2 + internal/ansible/controller/reconcile.go | 6 +- .../molecule/default/tasks/memcached_test.yml | 2 - 11 files changed, 167 insertions(+), 16 deletions(-) create mode 100644 changelog/fragments/finalizerconcurrency.yaml create mode 100644 hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml create mode 100644 hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrency_test.yml diff --git a/Makefile b/Makefile index 2e30f613fd7..696b08ffd9b 100644 --- a/Makefile +++ b/Makefile @@ -156,10 +156,14 @@ e2e_targets := test-e2e $(e2e_tests) export KIND_CLUSTER := osdk-test KUBEBUILDER_ASSETS = $(PWD)/$(shell go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest && $(shell go env GOPATH)/bin/setup-envtest use $(ENVTEST_K8S_VERSION) --bin-dir tools/bin/ -p path) -test-e2e-setup: build +test-e2e-setup:: build dev-install cluster-create + +cluster-create:: + [[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER) + +dev-install:: $(SCRIPTS_DIR)/fetch kind 0.11.0 $(SCRIPTS_DIR)/fetch kubectl $(ENVTEST_K8S_VERSION) # Install kubectl AFTER envtest because envtest includes its own kubectl binary - [[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER) .PHONY: test-e2e-teardown test-e2e-teardown: @@ -177,7 +181,7 @@ test-e2e-go:: image/custom-scorecard-tests ## Run Go e2e tests test-e2e-ansible:: image/ansible-operator ## Run Ansible e2e tests go test -count=1 ./internal/ansible/proxy/... go test ./test/e2e/ansible -v -ginkgo.v -test-e2e-ansible-molecule:: image/ansible-operator ## Run molecule-based Ansible e2e tests +test-e2e-ansible-molecule:: install dev-install image/ansible-operator ## Run molecule-based Ansible e2e tests go run ./hack/generate/samples/molecule/generate.go ./hack/tests/e2e-ansible-molecule.sh test-e2e-helm:: image/helm-operator ## Run Helm e2e tests diff --git a/changelog/fragments/finalizerconcurrency.yaml b/changelog/fragments/finalizerconcurrency.yaml new file mode 100644 index 00000000000..121e4cba677 --- /dev/null +++ b/changelog/fragments/finalizerconcurrency.yaml @@ -0,0 +1,11 @@ +# entries is a list of entries to include in +# release notes and/or the migration guide +entries: + - description: > + For ansible-based operators, this change fixes an issue that caused + finalizers to fail to run if the watched resource (CR) is deleted during + reconciliation. + kind: "bugfix" + + # Is this a breaking change? + breaking: false diff --git a/hack/generate/samples/internal/ansible/advanced_molecule.go b/hack/generate/samples/internal/ansible/advanced_molecule.go index f603e2bb76f..5d257c5330a 100644 --- a/hack/generate/samples/internal/ansible/advanced_molecule.go +++ b/hack/generate/samples/internal/ansible/advanced_molecule.go @@ -235,6 +235,12 @@ func (ma *AdvancedMolecule) addMocksFromTestdata() { cmd = exec.Command("cp", "-r", "../../../hack/generate/samples/internal/ansible/testdata/inventory/", filepath.Join(ma.ctx.Dir, "inventory/")) _, err = ma.ctx.Run(cmd) pkg.CheckError("adding inventory/", err) + + log.Infof("adding finalizer for finalizerconcurrencytest") + cmd = exec.Command("cp", "../../../hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml", filepath.Join(ma.ctx.Dir, "playbooks/finalizerconcurrencyfinalizer.yml")) + _, err = ma.ctx.Run(cmd) + pkg.CheckError("adding finalizer for finalizerconccurencytest", err) + } func (ma *AdvancedMolecule) updateDockerfile() { @@ -456,8 +462,6 @@ func (ma *AdvancedMolecule) updatePlaybooks() { command: '{{ exec_command }}' register: exec_result - - debug: var=exec_result - - name: Get logs from busybox pod k8s_log: name: '{{ meta.name }}-busybox' @@ -516,6 +520,37 @@ func (ma *AdvancedMolecule) updatePlaybooks() { clusterAnnotationTest) pkg.CheckError("adding playbook for clusterannotationtest", err) + log.Infof("adding playbook for finalizerconcurrencytest") + const finalizerConcurrencyTest = `--- +--- +- hosts: localhost + gather_facts: no + collections: + - kubernetes.core + - operator_sdk.util + + tasks: + - debug: + msg: "Pausing until configmap exists" + + - name: Wait for configmap + k8s_info: + apiVersion: v1 + kind: ConfigMap + name: unpause-reconciliation + namespace: osdk-test + wait: yes + wait_sleep: 10 + wait_timeout: 360 + + - debug: + msg: "Unpause!" +` + err = kbutil.ReplaceInFile( + filepath.Join(ma.ctx.Dir, "playbooks", "finalizerconcurrencytest.yml"), + originalPlaybookFragment, + finalizerConcurrencyTest) + pkg.CheckError("adding playbook for finalizerconcurrencytest", err) } func (ma *AdvancedMolecule) addPlaybooks() { @@ -524,6 +559,7 @@ func (ma *AdvancedMolecule) addPlaybooks() { "CaseTest", "CollectionTest", "ClusterAnnotationTest", + "FinalizerConcurrencyTest", "ReconciliationTest", "SelectorTest", "SubresourcesTest", diff --git a/hack/generate/samples/internal/ansible/constants.go b/hack/generate/samples/internal/ansible/constants.go index 1a3b463dc3b..5e5f02c8789 100644 --- a/hack/generate/samples/internal/ansible/constants.go +++ b/hack/generate/samples/internal/ansible/constants.go @@ -204,8 +204,6 @@ const moleculeTaskFragment = `- name: Load CR status: "True" - name: Wait 2 minutes for memcached deployment - debug: - var: deploy until: - deploy is defined - deploy.status is defined @@ -400,8 +398,6 @@ const originaMemcachedMoleculeTask = `- name: Create the cache.example.com/v1alp fail_msg: FIXME Add real assertions for your operator` const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached deployment - debug: - var: deploy until: - deploy is defined - deploy.status is defined @@ -415,10 +411,10 @@ const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached depl api_version="apps/v1", namespace=namespace, label_selector="app=memcached" - )}}'` + )}}' +` const molecuTaskToCheckConfigMap = ` - - name: Create ConfigMap that the Operator should delete k8s: definition: diff --git a/hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml b/hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml new file mode 100644 index 00000000000..2cc57f3b112 --- /dev/null +++ b/hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml @@ -0,0 +1,34 @@ +--- +- hosts: localhost + gather_facts: no + collections: + - kubernetes.core + - operator_sdk.util + + tasks: + - debug: + msg: "Pausing until configmap exists" + + - name: Wait for configmap + k8s_info: + api_version: v1 + kind: ConfigMap + name: finalizer-concurrency-results + namespace: osdk-test + wait: yes + wait_sleep: 10 + wait_timeout: 30 + + - name: Update configmap + k8s: + state: present + force: yes + definition: + apiVersion: v1 + kind: ConfigMap + metadata: + name: finalizer-concurrency-results + namespace: osdk-test + data: + finalizer: "success" + wait: yes diff --git a/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrency_test.yml b/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrency_test.yml new file mode 100644 index 00000000000..8131669c6c8 --- /dev/null +++ b/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrency_test.yml @@ -0,0 +1,58 @@ +--- +# TODO(asmacdo) this should be the only task. the other is getting magiced in +- name: Create the test.example.com/v1alpha1.FinalizerConcurrencyTest + k8s: + state: present + definition: + apiVersion: test.example.com/v1alpha1 + kind: FinalizerConcurrencyTest + metadata: + name: finalizer-concurrency-test + namespace: '{{ namespace }}' + wait: no + +- name: While reconcile is paused, delete the CR + k8s: + state: absent + definition: + apiVersion: test.example.com/v1alpha1 + kind: FinalizerConcurrencyTest + metadata: + name: finalizer-concurrency-test + namespace: '{{ namespace }}' + wait: no + +- name: Create a configmap to allow reconciliation to unpause + k8s: + state: present + definition: + apiVersion: v1 + kind: ConfigMap + metadata: + name: finalizer-concurrency-results + namespace: osdk-test + wait: no + +- name: Wait for the custom resource to be deleted + k8s_info: + api_version: test.example.com/v1alpha1 + kind: FinalizerConcurrencyTest + namespace: osdk-test # TODO(asmacdo) Fixme + name: finalizer-concurrency-test + register: cr + retries: 10 + delay: 6 + until: not cr.resources + failed_when: cr.resources + +- name: Retrive the cm + k8s_info: + api_version: v1 + kind: ConfigMap + name: finalizer-concurrency-results + namespace: osdk-test + register: finalizer_test + +- name: Assert that finalizer ran + assert: + that: finalizer_test.resources.0.data.finalizer== 'success' diff --git a/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml b/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml index 4a175a4e32e..7a12b945c83 100644 --- a/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml +++ b/hack/generate/samples/internal/ansible/testdata/tasks/subresourcestest_test.yml @@ -18,8 +18,6 @@ status: "True" register: subresources_test -- debug: var=subresources_test - - name: Assert stdout and stderr are properly set in status assert: that: diff --git a/hack/generate/samples/internal/ansible/testdata/watches.yaml b/hack/generate/samples/internal/ansible/testdata/watches.yaml index c94cf3ea98e..1adfbd9fa48 100644 --- a/hack/generate/samples/internal/ansible/testdata/watches.yaml +++ b/hack/generate/samples/internal/ansible/testdata/watches.yaml @@ -71,4 +71,14 @@ watchClusterScopedResources: true vars: meta: '{{ ansible_operator_meta }}' + +- version: v1alpha1 + group: test.example.com + kind: FinalizerConcurrencyTest + playbook: playbooks/finalizerconcurrencytest.yml + finalizer: + name: test.example.com/finalizer + playbook: playbooks/finalizerconcurrencyfinalizer.yml + vars: + meta: '{{ ansible_operator_meta }}' #+kubebuilder:scaffold:watch diff --git a/hack/tests/e2e-ansible-molecule.sh b/hack/tests/e2e-ansible-molecule.sh index f2046b45567..b8f80059506 100755 --- a/hack/tests/e2e-ansible-molecule.sh +++ b/hack/tests/e2e-ansible-molecule.sh @@ -17,6 +17,8 @@ set -eu header_text "Running ansible molecule tests in a python3 virtual environment" + + # Set up a python3.8 virtual environment. ENVDIR="$(mktemp -d)" trap_add "set +u; deactivate; set -u; rm -rf $ENVDIR" EXIT diff --git a/internal/ansible/controller/reconcile.go b/internal/ansible/controller/reconcile.go index 45a259a4373..a85738f51c4 100644 --- a/internal/ansible/controller/reconcile.go +++ b/internal/ansible/controller/reconcile.go @@ -243,8 +243,9 @@ func (r *AnsibleOperatorReconciler) Reconcile(ctx context.Context, request recon // and do it at the end runSuccessful := len(failureMessages) == 0 + recentlyDeleted := u.GetDeletionTimestamp() != nil + // The finalizer has run successfully, time to remove it - deleted = u.GetDeletionTimestamp() != nil if deleted && finalizerExists && runSuccessful { controllerutil.RemoveFinalizer(u, finalizer) err := r.Client.Update(ctx, u) @@ -252,6 +253,9 @@ func (r *AnsibleOperatorReconciler) Reconcile(ctx context.Context, request recon logger.Error(err, "Failed to remove finalizer") return reconcileResult, err } + } else if recentlyDeleted && finalizerExists { + // If the CR was deleted after the reconcile began, we need to requeue for the finalizer. + reconcileResult.Requeue = true } if r.ManageStatus { errmark := r.markDone(ctx, request.NamespacedName, u, statusEvent, failureMessages) diff --git a/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml b/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml index b13cac0760a..abf3823068c 100644 --- a/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml +++ b/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml @@ -17,8 +17,6 @@ status: "True" - name: Wait 2 minutes for memcached deployment - debug: - var: deploy until: - deploy is defined - deploy.status is defined From 38f0029f1de8afe17e89c379f6ee0ad53f9530d1 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 25 Apr 2022 11:28:39 -0400 Subject: [PATCH 2/5] dont change constants Signed-off-by: Austin Macdonald --- hack/generate/samples/internal/ansible/constants.go | 7 +++++-- .../molecule/default/tasks/memcached_test.yml | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hack/generate/samples/internal/ansible/constants.go b/hack/generate/samples/internal/ansible/constants.go index 5e5f02c8789..17958babe69 100644 --- a/hack/generate/samples/internal/ansible/constants.go +++ b/hack/generate/samples/internal/ansible/constants.go @@ -204,6 +204,8 @@ const moleculeTaskFragment = `- name: Load CR status: "True" - name: Wait 2 minutes for memcached deployment + debug: + var: deploy until: - deploy is defined - deploy.status is defined @@ -398,6 +400,8 @@ const originaMemcachedMoleculeTask = `- name: Create the cache.example.com/v1alp fail_msg: FIXME Add real assertions for your operator` const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached deployment + debug: + var: deploy until: - deploy is defined - deploy.status is defined @@ -411,8 +415,7 @@ const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached depl api_version="apps/v1", namespace=namespace, label_selector="app=memcached" - )}}' -` + )}}'` const molecuTaskToCheckConfigMap = ` - name: Create ConfigMap that the Operator should delete diff --git a/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml b/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml index abf3823068c..b13cac0760a 100644 --- a/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml +++ b/testdata/ansible/memcached-operator/molecule/default/tasks/memcached_test.yml @@ -17,6 +17,8 @@ status: "True" - name: Wait 2 minutes for memcached deployment + debug: + var: deploy until: - deploy is defined - deploy.status is defined From 52cf2a49d684eeb1c4972eabccc764e591460bdd Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Mon, 25 Apr 2022 12:40:27 -0400 Subject: [PATCH 3/5] change name so we dont have a half-made test Signed-off-by: Austin Macdonald --- ...izerconcurrency_test.yml => finalizerconcurrencytest_test.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename hack/generate/samples/internal/ansible/testdata/tasks/{finalizerconcurrency_test.yml => finalizerconcurrencytest_test.yml} (100%) diff --git a/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrency_test.yml b/hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrencytest_test.yml similarity index 100% rename from hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrency_test.yml rename to hack/generate/samples/internal/ansible/testdata/tasks/finalizerconcurrencytest_test.yml From 18936dbb52ba1bd8880137c6dea8596e94ee873f Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Thu, 28 Apr 2022 11:06:39 -0400 Subject: [PATCH 4/5] PR cleanup Signed-off-by: Austin Macdonald --- Makefile | 2 ++ hack/tests/e2e-ansible-molecule.sh | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 696b08ffd9b..2f5103c0e6e 100644 --- a/Makefile +++ b/Makefile @@ -158,9 +158,11 @@ export KIND_CLUSTER := osdk-test KUBEBUILDER_ASSETS = $(PWD)/$(shell go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest && $(shell go env GOPATH)/bin/setup-envtest use $(ENVTEST_K8S_VERSION) --bin-dir tools/bin/ -p path) test-e2e-setup:: build dev-install cluster-create +.PHONY: cluster-create cluster-create:: [[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER) +.PHONY: dev-install dev-install:: $(SCRIPTS_DIR)/fetch kind 0.11.0 $(SCRIPTS_DIR)/fetch kubectl $(ENVTEST_K8S_VERSION) # Install kubectl AFTER envtest because envtest includes its own kubectl binary diff --git a/hack/tests/e2e-ansible-molecule.sh b/hack/tests/e2e-ansible-molecule.sh index b8f80059506..f2046b45567 100755 --- a/hack/tests/e2e-ansible-molecule.sh +++ b/hack/tests/e2e-ansible-molecule.sh @@ -17,8 +17,6 @@ set -eu header_text "Running ansible molecule tests in a python3 virtual environment" - - # Set up a python3.8 virtual environment. ENVDIR="$(mktemp -d)" trap_add "set +u; deactivate; set -u; rm -rf $ENVDIR" EXIT From 325517d1bf0abc28168eee4de82a23952469da13 Mon Sep 17 00:00:00 2001 From: Austin Macdonald Date: Thu, 28 Apr 2022 12:36:55 -0400 Subject: [PATCH 5/5] rm extra --- Signed-off-by: Austin Macdonald --- hack/generate/samples/internal/ansible/advanced_molecule.go | 1 - 1 file changed, 1 deletion(-) diff --git a/hack/generate/samples/internal/ansible/advanced_molecule.go b/hack/generate/samples/internal/ansible/advanced_molecule.go index 5d257c5330a..0cba602e8fe 100644 --- a/hack/generate/samples/internal/ansible/advanced_molecule.go +++ b/hack/generate/samples/internal/ansible/advanced_molecule.go @@ -522,7 +522,6 @@ func (ma *AdvancedMolecule) updatePlaybooks() { log.Infof("adding playbook for finalizerconcurrencytest") const finalizerConcurrencyTest = `--- ---- - hosts: localhost gather_facts: no collections: