Skip to content

Commit

Permalink
Fix ansible-operator finalizer concurrency issue
Browse files Browse the repository at this point in the history
For ansible-based operators, this change fixes an issue that caused
finalizers to fail to run if the watched resource (CR) is deleted during
reconciliation.

Fixes #4909

Signed-off-by: Austin Macdonald <[email protected]>
  • Loading branch information
asmacdo committed Apr 25, 2022
1 parent 95237b3 commit 5d3b1c3
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 16 deletions.
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,14 @@ e2e_targets := test-e2e $(e2e_tests)
export KIND_CLUSTER := osdk-test

KUBEBUILDER_ASSETS = $(PWD)/$(shell go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest && $(shell go env GOPATH)/bin/setup-envtest use $(ENVTEST_K8S_VERSION) --bin-dir tools/bin/ -p path)
test-e2e-setup: build
test-e2e-setup:: build dev-install cluster-create

cluster-create::
[[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER)

dev-install::
$(SCRIPTS_DIR)/fetch kind 0.11.0
$(SCRIPTS_DIR)/fetch kubectl $(ENVTEST_K8S_VERSION) # Install kubectl AFTER envtest because envtest includes its own kubectl binary
[[ "`$(TOOLS_DIR)/kind get clusters`" =~ "$(KIND_CLUSTER)" ]] || $(TOOLS_DIR)/kind create cluster --image="kindest/node:v$(ENVTEST_K8S_VERSION)" --name $(KIND_CLUSTER)

.PHONY: test-e2e-teardown
test-e2e-teardown:
Expand All @@ -177,7 +181,7 @@ test-e2e-go:: image/custom-scorecard-tests ## Run Go e2e tests
test-e2e-ansible:: image/ansible-operator ## Run Ansible e2e tests
go test -count=1 ./internal/ansible/proxy/...
go test ./test/e2e/ansible -v -ginkgo.v
test-e2e-ansible-molecule:: image/ansible-operator ## Run molecule-based Ansible e2e tests
test-e2e-ansible-molecule:: install dev-install image/ansible-operator ## Run molecule-based Ansible e2e tests
go run ./hack/generate/samples/molecule/generate.go
./hack/tests/e2e-ansible-molecule.sh
test-e2e-helm:: image/helm-operator ## Run Helm e2e tests
Expand Down
11 changes: 11 additions & 0 deletions changelog/fragments/finalizerconcurrency.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# entries is a list of entries to include in
# release notes and/or the migration guide
entries:
- description: >
For ansible-based operators, this change fixes an issue that caused
finalizers to fail to run if the watched resource (CR) is deleted during
reconciliation.
kind: "bugfix"
# Is this a breaking change?
breaking: false
40 changes: 38 additions & 2 deletions hack/generate/samples/internal/ansible/advanced_molecule.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ func (ma *AdvancedMolecule) addMocksFromTestdata() {
cmd = exec.Command("cp", "-r", "../../../hack/generate/samples/internal/ansible/testdata/inventory/", filepath.Join(ma.ctx.Dir, "inventory/"))
_, err = ma.ctx.Run(cmd)
pkg.CheckError("adding inventory/", err)

log.Infof("adding finalizer for finalizerconcurrencytest")
cmd = exec.Command("cp", "../../../hack/generate/samples/internal/ansible/testdata/playbooks/finalizerconcurrencyfinalizer.yml", filepath.Join(ma.ctx.Dir, "playbooks/finalizerconcurrencyfinalizer.yml"))
_, err = ma.ctx.Run(cmd)
pkg.CheckError("adding finalizer for finalizerconccurencytest", err)

}

func (ma *AdvancedMolecule) updateDockerfile() {
Expand Down Expand Up @@ -456,8 +462,6 @@ func (ma *AdvancedMolecule) updatePlaybooks() {
command: '{{ exec_command }}'
register: exec_result
- debug: var=exec_result
- name: Get logs from busybox pod
k8s_log:
name: '{{ meta.name }}-busybox'
Expand Down Expand Up @@ -516,6 +520,37 @@ func (ma *AdvancedMolecule) updatePlaybooks() {
clusterAnnotationTest)
pkg.CheckError("adding playbook for clusterannotationtest", err)

log.Infof("adding playbook for finalizerconcurrencytest")
const finalizerConcurrencyTest = `---
---
- hosts: localhost
gather_facts: no
collections:
- kubernetes.core
- operator_sdk.util
tasks:
- debug:
msg: "Pausing until configmap exists"
- name: Wait for configmap
k8s_info:
apiVersion: v1
kind: ConfigMap
name: unpause-reconciliation
namespace: osdk-test
wait: yes
wait_sleep: 10
wait_timeout: 360
- debug:
msg: "Unpause!"
`
err = kbutil.ReplaceInFile(
filepath.Join(ma.ctx.Dir, "playbooks", "finalizerconcurrencytest.yml"),
originalPlaybookFragment,
finalizerConcurrencyTest)
pkg.CheckError("adding playbook for finalizerconcurrencytest", err)
}

func (ma *AdvancedMolecule) addPlaybooks() {
Expand All @@ -524,6 +559,7 @@ func (ma *AdvancedMolecule) addPlaybooks() {
"CaseTest",
"CollectionTest",
"ClusterAnnotationTest",
"FinalizerConcurrencyTest",
"ReconciliationTest",
"SelectorTest",
"SubresourcesTest",
Expand Down
8 changes: 2 additions & 6 deletions hack/generate/samples/internal/ansible/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,6 @@ const moleculeTaskFragment = `- name: Load CR
status: "True"
- name: Wait 2 minutes for memcached deployment
debug:
var: deploy
until:
- deploy is defined
- deploy.status is defined
Expand Down Expand Up @@ -400,8 +398,6 @@ const originaMemcachedMoleculeTask = `- name: Create the cache.example.com/v1alp
fail_msg: FIXME Add real assertions for your operator`

const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached deployment
debug:
var: deploy
until:
- deploy is defined
- deploy.status is defined
Expand All @@ -415,10 +411,10 @@ const targetMoleculeCheckDeployment = `- name: Wait 2 minutes for memcached depl
api_version="apps/v1",
namespace=namespace,
label_selector="app=memcached"
)}}'`
)}}'
`

const molecuTaskToCheckConfigMap = `
- name: Create ConfigMap that the Operator should delete
k8s:
definition:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
- hosts: localhost
gather_facts: no
collections:
- kubernetes.core
- operator_sdk.util

tasks:
- debug:
msg: "Pausing until configmap exists"

- name: Wait for configmap
k8s_info:
api_version: v1
kind: ConfigMap
name: finalizer-concurrency-results
namespace: osdk-test
wait: yes
wait_sleep: 10
wait_timeout: 30

- name: Update configmap
k8s:
state: present
force: yes
definition:
apiVersion: v1
kind: ConfigMap
metadata:
name: finalizer-concurrency-results
namespace: osdk-test
data:
finalizer: "success"
wait: yes
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
---
# TODO(asmacdo) this should be the only task. the other is getting magiced in
- name: Create the test.example.com/v1alpha1.FinalizerConcurrencyTest
k8s:
state: present
definition:
apiVersion: test.example.com/v1alpha1
kind: FinalizerConcurrencyTest
metadata:
name: finalizer-concurrency-test
namespace: '{{ namespace }}'
wait: no

- name: While reconcile is paused, delete the CR
k8s:
state: absent
definition:
apiVersion: test.example.com/v1alpha1
kind: FinalizerConcurrencyTest
metadata:
name: finalizer-concurrency-test
namespace: '{{ namespace }}'
wait: no

- name: Create a configmap to allow reconciliation to unpause
k8s:
state: present
definition:
apiVersion: v1
kind: ConfigMap
metadata:
name: finalizer-concurrency-results
namespace: osdk-test
wait: no

- name: Wait for the custom resource to be deleted
k8s_info:
api_version: test.example.com/v1alpha1
kind: FinalizerConcurrencyTest
namespace: osdk-test # TODO(asmacdo) Fixme
name: finalizer-concurrency-test
register: cr
retries: 10
delay: 6
until: not cr.resources
failed_when: cr.resources

- name: Retrive the cm
k8s_info:
api_version: v1
kind: ConfigMap
name: finalizer-concurrency-results
namespace: osdk-test
register: finalizer_test

- name: Assert that finalizer ran
assert:
that: finalizer_test.resources.0.data.finalizer== 'success'
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
status: "True"
register: subresources_test

- debug: var=subresources_test

- name: Assert stdout and stderr are properly set in status
assert:
that:
Expand Down
10 changes: 10 additions & 0 deletions hack/generate/samples/internal/ansible/testdata/watches.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,14 @@
watchClusterScopedResources: true
vars:
meta: '{{ ansible_operator_meta }}'

- version: v1alpha1
group: test.example.com
kind: FinalizerConcurrencyTest
playbook: playbooks/finalizerconcurrencytest.yml
finalizer:
name: test.example.com/finalizer
playbook: playbooks/finalizerconcurrencyfinalizer.yml
vars:
meta: '{{ ansible_operator_meta }}'
#+kubebuilder:scaffold:watch
2 changes: 2 additions & 0 deletions hack/tests/e2e-ansible-molecule.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ set -eu

header_text "Running ansible molecule tests in a python3 virtual environment"



# Set up a python3.8 virtual environment.
ENVDIR="$(mktemp -d)"
trap_add "set +u; deactivate; set -u; rm -rf $ENVDIR" EXIT
Expand Down
6 changes: 5 additions & 1 deletion internal/ansible/controller/reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,19 @@ func (r *AnsibleOperatorReconciler) Reconcile(ctx context.Context, request recon
// and do it at the end
runSuccessful := len(failureMessages) == 0

recentlyDeleted := u.GetDeletionTimestamp() != nil

// The finalizer has run successfully, time to remove it
deleted = u.GetDeletionTimestamp() != nil
if deleted && finalizerExists && runSuccessful {
controllerutil.RemoveFinalizer(u, finalizer)
err := r.Client.Update(ctx, u)
if err != nil {
logger.Error(err, "Failed to remove finalizer")
return reconcileResult, err
}
} else if recentlyDeleted && finalizerExists {
// If the CR was deleted after the reconcile began, we need to requeue for the finalizer.
reconcileResult.Requeue = true
}
if r.ManageStatus {
errmark := r.markDone(ctx, request.NamespacedName, u, statusEvent, failureMessages)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
status: "True"

- name: Wait 2 minutes for memcached deployment
debug:
var: deploy
until:
- deploy is defined
- deploy.status is defined
Expand Down

0 comments on commit 5d3b1c3

Please sign in to comment.