From 607a7ca58ceb5fb3dcb7691357cd782d801066fb Mon Sep 17 00:00:00 2001 From: John Westcott IV <32551173+john-westcott-iv@users.noreply.github.com> Date: Thu, 29 Feb 2024 17:02:11 -0500 Subject: [PATCH] Upgrading to PostgreSQL 15 and moving to sclorg images (#1486) * Upgrading to postgres:15 * Changing image from postgres to sclorg * Handle scenario where upgrade status is not defined & correct pg tag * Rework the upgrade logic to be more resiliant for multiple upgrades --------- Co-authored-by: john-westcott-iv Co-authored-by: Christian M. Adams --- CONTRIBUTING.md | 20 ++++--- config/manager/kustomization.yaml | 4 +- docs/upgrade/upgrading.md | 2 +- .../assigning-awx-pods-to-specific-nodes.md | 38 ++++++------- docs/user-guide/database-configuration.md | 20 +++---- molecule/default/tasks/awx_replicas_test.yml | 2 +- roles/backup/vars/main.yml | 6 +- roles/installer/defaults/main.yml | 4 +- .../tasks/database_configuration.yml | 56 +++++++++++++++---- roles/installer/tasks/update_status.yml | 2 +- roles/installer/tasks/upgrade_postgres.yml | 33 ++++++++--- .../templates/statefulsets/postgres.yaml.j2 | 2 +- roles/installer/vars/main.yml | 4 +- roles/restore/README.md | 2 +- roles/restore/vars/main.yml | 6 +- 15 files changed, 129 insertions(+), 72 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9134d7cfb..abd09b1fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,13 +6,15 @@ Have questions about this document or anything not covered here? Please file a n ## Table of contents -* [Things to know prior to submitting code](#things-to-know-prior-to-submitting-code) -* [Submmiting your Work](#submitting-your-work) -* [Testing](#testing) - * [Testing in Docker](#testing-in-docker) - * [Testing in Minikube](#testing-in-minikube) -* [Generating a bundle](#generating-a-bundle) -* [Reporting Issues](#reporting-issues) +- [AWX-Operator Contributing Guidelines](#awx-operator-contributing-guidelines) + - [Table of contents](#table-of-contents) + - [Things to know prior to submitting code](#things-to-know-prior-to-submitting-code) + - [Submmiting your work](#submmiting-your-work) + - [Testing](#testing) + - [Testing in Kind](#testing-in-kind) + - [Testing in Minikube](#testing-in-minikube) + - [Generating a bundle](#generating-a-bundle) + - [Reporting Issues](#reporting-issues) ## Things to know prior to submitting code @@ -44,12 +46,12 @@ Have questions about this document or anything not covered here? Please file a n ## Testing -This Operator includes a [Molecule](https://molecule.readthedocs.io/en/stable/)-based test environment, which can be executed standalone in Docker (e.g. in CI or in a single Docker container anywhere), or inside any kind of Kubernetes cluster (e.g. Minikube). +This Operator includes a [Molecule](https://ansible.readthedocs.io/projects/molecule/)-based test environment, which can be executed standalone in Docker (e.g. in CI or in a single Docker container anywhere), or inside any kind of Kubernetes cluster (e.g. Minikube). You need to make sure you have Molecule installed before running the following commands. You can install Molecule with: ```sh -#> pip install 'molecule[docker]' +#> python -m pip install molecule-plugins[docker] ``` Running `molecule test` sets up a clean environment, builds the operator, runs all configured tests on an example operator instance, then tears down the environment (at least in the case of Docker). diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 995c877c8..0f2dccf3d 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,9 +5,9 @@ generatorOptions: disableNameSuffixHash: true configMapGenerator: -- name: awx-manager-config - files: +- files: - controller_manager_config.yaml + name: awx-manager-config apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization diff --git a/docs/upgrade/upgrading.md b/docs/upgrade/upgrading.md index e826caded..0bf61c81e 100644 --- a/docs/upgrade/upgrading.md +++ b/docs/upgrade/upgrading.md @@ -14,7 +14,7 @@ The first part of any upgrade should be a backup. Note, there are secrets in the In the event you need to recover the backup see the [restore role documentation](https://github.com/ansible/awx-operator/tree/devel/roles/restore). *Before Restoring from a backup*, be sure to: * delete the old existing AWX CR -* delete the persistent volume claim (PVC) for the database from the old deployment, which has a name like `postgres-13--postgres-13-0` +* delete the persistent volume claim (PVC) for the database from the old deployment, which has a name like `postgres-15--postgres-15-0` **Note**: Do not delete the namespace/project, as that will delete the backup and the backup's PVC as well. diff --git a/docs/user-guide/advanced-configuration/assigning-awx-pods-to-specific-nodes.md b/docs/user-guide/advanced-configuration/assigning-awx-pods-to-specific-nodes.md index 103acd8df..79f34208c 100644 --- a/docs/user-guide/advanced-configuration/assigning-awx-pods-to-specific-nodes.md +++ b/docs/user-guide/advanced-configuration/assigning-awx-pods-to-specific-nodes.md @@ -9,25 +9,25 @@ If you want to use affinity rules for your AWX pod you can use the `affinity` op If you want to constrain the web and task pods individually, you can do so by specificying the deployment type before the specific setting. For example, specifying `task_tolerations` will allow the AWX task pod to be scheduled onto nodes with matching taints. -| Name | Description | Default | -| -------------------------------- | ---------------------------------------- | ------- | -| postgres_image | Path of the image to pull | postgres | -| postgres_image_version | Image version to pull | 13 | -| node_selector | AWX pods' nodeSelector | '' | -| web_node_selector | AWX web pods' nodeSelector | '' | -| task_node_selector | AWX task pods' nodeSelector | '' | -| topology_spread_constraints | AWX pods' topologySpreadConstraints | '' | -| web_topology_spread_constraints | AWX web pods' topologySpreadConstraints | '' | -| task_topology_spread_constraints | AWX task pods' topologySpreadConstraints | '' | -| affinity | AWX pods' affinity rules | '' | -| web_affinity | AWX web pods' affinity rules | '' | -| task_affinity | AWX task pods' affinity rules | '' | -| tolerations | AWX pods' tolerations | '' | -| web_tolerations | AWX web pods' tolerations | '' | -| task_tolerations | AWX task pods' tolerations | '' | -| annotations | AWX pods' annotations | '' | -| postgres_selector | Postgres pods' nodeSelector | '' | -| postgres_tolerations | Postgres pods' tolerations | '' | +| Name | Description | Default | +| -------------------------------- | ---------------------------------------- | -------------------------------- | +| postgres_image | Path of the image to pull | quay.io/sclorg/postgresql-15-c9s | +| postgres_image_version | Image version to pull | latest | +| node_selector | AWX pods' nodeSelector | '' | +| web_node_selector | AWX web pods' nodeSelector | '' | +| task_node_selector | AWX task pods' nodeSelector | '' | +| topology_spread_constraints | AWX pods' topologySpreadConstraints | '' | +| web_topology_spread_constraints | AWX web pods' topologySpreadConstraints | '' | +| task_topology_spread_constraints | AWX task pods' topologySpreadConstraints | '' | +| affinity | AWX pods' affinity rules | '' | +| web_affinity | AWX web pods' affinity rules | '' | +| task_affinity | AWX task pods' affinity rules | '' | +| tolerations | AWX pods' tolerations | '' | +| web_tolerations | AWX web pods' tolerations | '' | +| task_tolerations | AWX task pods' tolerations | '' | +| annotations | AWX pods' annotations | '' | +| postgres_selector | Postgres pods' nodeSelector | '' | +| postgres_tolerations | Postgres pods' tolerations | '' | Example of customization could be: diff --git a/docs/user-guide/database-configuration.md b/docs/user-guide/database-configuration.md index 556e8fe44..901d56b15 100644 --- a/docs/user-guide/database-configuration.md +++ b/docs/user-guide/database-configuration.md @@ -2,7 +2,7 @@ #### Postgres Version -The default Postgres version for the version of AWX bundled with the latest version of the awx-operator is Postgres 13. You can find this default for a given version by at the default value for [_postgres_image_version](https://github.com/ansible/awx-operator/blob/devel/roles/installer/defaults/main.yml#L243). +The default Postgres version for the version of AWX bundled with the latest version of the awx-operator is Postgres 15. You can find this default for a given version by at the default value for [_postgres_image_version](https://github.com/ansible/awx-operator/blob/devel/roles/installer/defaults/main.yml#L243). We only have coverage for the default version of Postgres. Newer versions of Postgres (14+) will likely work, but should only be configured as an external database. If your database is managed by the awx-operator (default if you don't specify a `postgres_configuration_secret`), then you should not override the default version as this may cause issues when awx-operator tries to upgrade your postgresql pod. @@ -56,15 +56,15 @@ If you don't have access to an external PostgreSQL service, the AWX operator can The following variables are customizable for the managed PostgreSQL service -| Name | Description | Default | -| --------------------------------------------- | --------------------------------------------- | ---------------------------------- | -| postgres_image | Path of the image to pull | postgres:12 | -| postgres_init_container_resource_requirements | Database init container resource requirements | requests: {cpu: 10m, memory: 64Mi} | -| postgres_resource_requirements | PostgreSQL container resource requirements | requests: {cpu: 10m, memory: 64Mi} | -| postgres_storage_requirements | PostgreSQL container storage requirements | requests: {storage: 8Gi} | -| postgres_storage_class | PostgreSQL PV storage class | Empty string | -| postgres_data_path | PostgreSQL data path | `/var/lib/postgresql/data/pgdata` | -| postgres_priority_class | Priority class used for PostgreSQL pod | Empty string | +| Name | Description | Default | +| --------------------------------------------- | --------------------------------------------- | --------------------------------------- | +| postgres_image | Path of the image to pull | quay.io/sclorg/postgresql-15-c9s:latest | +| postgres_init_container_resource_requirements | Database init container resource requirements | requests: {cpu: 10m, memory: 64Mi} | +| postgres_resource_requirements | PostgreSQL container resource requirements | requests: {cpu: 10m, memory: 64Mi} | +| postgres_storage_requirements | PostgreSQL container storage requirements | requests: {storage: 8Gi} | +| postgres_storage_class | PostgreSQL PV storage class | Empty string | +| postgres_data_path | PostgreSQL data path | `/var/lib/postgresql/data/pgdata` | +| postgres_priority_class | Priority class used for PostgreSQL pod | Empty string | Example of customization could be: diff --git a/molecule/default/tasks/awx_replicas_test.yml b/molecule/default/tasks/awx_replicas_test.yml index 93afe7eca..14536d869 100644 --- a/molecule/default/tasks/awx_replicas_test.yml +++ b/molecule/default/tasks/awx_replicas_test.yml @@ -61,4 +61,4 @@ expected_web_replicas: 3 expected_task_replicas: 3 tags: - - replicas + - replicas diff --git a/roles/backup/vars/main.yml b/roles/backup/vars/main.yml index 04872867c..b4957c476 100644 --- a/roles/backup/vars/main.yml +++ b/roles/backup/vars/main.yml @@ -1,8 +1,8 @@ --- deployment_type: "awx" -_postgres_image: postgres -_postgres_image_version: 13 +_postgres_image: quay.io/sclorg/postgresql-15-c9s +_postgres_image_version: latest backup_complete: false database_type: "unmanaged" -supported_pg_version: 13 +supported_pg_version: 15 image_pull_policy: IfNotPresent diff --git a/roles/installer/defaults/main.yml b/roles/installer/defaults/main.yml index b80d332ad..0b3293d20 100644 --- a/roles/installer/defaults/main.yml +++ b/roles/installer/defaults/main.yml @@ -255,8 +255,8 @@ _image: quay.io/ansible/awx _image_version: "{{ lookup('env', 'DEFAULT_AWX_VERSION') or 'latest' }}" _redis_image: docker.io/redis _redis_image_version: 7 -_postgres_image: postgres -_postgres_image_version: 13 +_postgres_image: quay.io/sclorg/postgresql-15-c9s +_postgres_image_version: latest image_pull_policy: IfNotPresent image_pull_secrets: [] diff --git a/roles/installer/tasks/database_configuration.yml b/roles/installer/tasks/database_configuration.yml index 66a6d7d7a..48bad24ab 100644 --- a/roles/installer/tasks/database_configuration.yml +++ b/roles/installer/tasks/database_configuration.yml @@ -106,14 +106,38 @@ set_fact: managed_database: "{{ pg_config['resources'][0]['data']['type'] | default('') | b64decode == 'managed' }}" -- name: Get the old postgres pod information +# It is possible that N-2 postgres pods may still be present in the namespace from previous upgrades. +# So we have to take that into account and preferentially set the most recent one. +- name: Get the old postgres pod (N-1) k8s_info: kind: Pod namespace: "{{ ansible_operator_meta.namespace }}" - name: "{{ ansible_operator_meta.name }}-postgres-0" field_selectors: - status.phase=Running - register: old_postgres_pod + register: _running_pods + +- block: + - name: Filter pods by name + set_fact: + filtered_old_postgres_pods: "{{ _running_pods.resources | + selectattr('metadata.name', 'match', ansible_operator_meta.name + '-postgres.*-0') | + rejectattr('metadata.name', 'search', '-' + supported_pg_version | string + '-0') | + list }}" + + # Sort pods by name in reverse order (most recent PG version first) and set + - name: Set info for previous postgres pod + set_fact: + sorted_old_postgres_pods: "{{ filtered_old_postgres_pods | + sort(attribute='metadata.name') | + reverse }}" + when: filtered_old_postgres_pods | length + + + - name: Set info for previous postgres pod + set_fact: + old_postgres_pod: "{{ sorted_old_postgres_pods | first }}" + when: filtered_old_postgres_pods | length + when: _running_pods.resources | length - name: Look up details for this deployment k8s_info: @@ -123,7 +147,14 @@ namespace: "{{ ansible_operator_meta.namespace }}" register: this_awx -- name: Check if postgres pod is running and version 12 +# If this deployment has been upgraded before or if upgrade has already been started, set this var +- name: Set previous PG version var + set_fact: + _previous_upgraded_pg_version: "{{ this_awx['resources'][0]['status']['upgradedPostgresVersion'] | default(false) }}" + when: + - "'upgradedPostgresVersion' in this_awx['resources'][0]['status']" + +- name: Check if postgres pod is running an older version block: - name: Set path to PG_VERSION file for given container image set_fact: @@ -132,21 +163,24 @@ - name: Get old PostgreSQL version k8s_exec: namespace: "{{ ansible_operator_meta.namespace }}" - pod: "{{ ansible_operator_meta.name }}-postgres-0" + pod: "{{ old_postgres_pod['metadata']['name'] }}" command: | bash -c """ cat {{ path_to_pg_version }} """ register: _old_pg_version - - name: Upgrade data dir from Postgres 12 to 13 if applicable + - debug: + msg: "--- Upgrading from {{ old_postgres_pod['metadata']['name'] | default('NONE')}} Pod ---" + + - name: Upgrade data dir from old Postgres to {{ supported_pg_version }} if applicable include_tasks: upgrade_postgres.yml when: - - _old_pg_version.stdout | default('0') | trim == '12' + - (_old_pg_version.stdout | default(0) | int ) < supported_pg_version when: - managed_database - - this_awx['resources'][0]['status']['upgradedPostgresVersion'] | default('none') != '12' - - old_postgres_pod['resources'] | length # upgrade is complete and old pg pod has been removed + - (_previous_upgraded_pg_version | default(false)) | ternary(_previous_upgraded_pg_version < supported_pg_version, true) + - old_postgres_pod | length # If empty, then old pg pod has been removed and we can assume the upgrade is complete - block: - name: Create Database if no database is specified @@ -167,7 +201,7 @@ kubernetes.core.k8s_scale: api_version: apps/v1 kind: StatefulSet - name: "{{ ansible_operator_meta.name }}-postgres-13" + name: "{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}" namespace: "{{ ansible_operator_meta.namespace }}" replicas: 0 wait: yes @@ -177,7 +211,7 @@ state: absent api_version: apps/v1 kind: StatefulSet - name: "{{ ansible_operator_meta.name }}-postgres-13" + name: "{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}" namespace: "{{ ansible_operator_meta.namespace }}" wait: yes when: create_statefulset_result.error == 422 diff --git a/roles/installer/tasks/update_status.yml b/roles/installer/tasks/update_status.yml index 9f59b3644..a693ae528 100644 --- a/roles/installer/tasks/update_status.yml +++ b/roles/installer/tasks/update_status.yml @@ -111,5 +111,5 @@ name: "{{ ansible_operator_meta.name }}" namespace: "{{ ansible_operator_meta.namespace }}" status: - upgradedPostgresVersion: "{{ upgraded_postgres_version }}" + upgradedPostgresVersion: "{{ upgraded_postgres_version | string }}" when: upgraded_postgres_version is defined diff --git a/roles/installer/tasks/upgrade_postgres.yml b/roles/installer/tasks/upgrade_postgres.yml index 7554769ef..ff3618a84 100644 --- a/roles/installer/tasks/upgrade_postgres.yml +++ b/roles/installer/tasks/upgrade_postgres.yml @@ -1,9 +1,9 @@ --- # Upgrade Posgres (Managed Databases only) -# * If postgres version is not 12, and not an external postgres instance (when managed_database is yes), +# * If postgres version is not supported_pg_version, and not an external postgres instance (when managed_database is yes), # then run this playbook with include_tasks from database_configuration.yml -# * Data will be streamed via a pg_dump from the postgres 12 pod to the postgres 13 +# * Data will be streamed via a pg_dump from the postgres 12/13 pod to the postgres supported_pg_version # pod via a pg_restore. @@ -62,9 +62,19 @@ set_fact: postgres_pod_name: "{{ postgres_pod['resources'][0]['metadata']['name'] }}" +- name: Get the name of the service for the old postgres pod + k8s_info: + kind: Service + namespace: "{{ ansible_operator_meta.namespace }}" + label_selectors: + - "app.kubernetes.io/component=database" + - "app.kubernetes.io/instance={{ old_postgres_pod.metadata.labels['app.kubernetes.io/instance'] }}" + - "app.kubernetes.io/managed-by=awx-operator" + register: old_postgres_svc + - name: Set full resolvable host name for postgres pod set_fact: - resolvable_db_host: "{{ ansible_operator_meta.name }}-postgres.{{ ansible_operator_meta.namespace }}.svc" # yamllint disable-line rule:line-length + resolvable_db_host: "{{ old_postgres_svc['resources'][0]['metadata']['name'] }}.{{ ansible_operator_meta.namespace }}.svc" # yamllint disable-line rule:line-length no_log: "{{ no_log }}" - name: Set pg_dump command @@ -118,7 +128,7 @@ - name: Set flag signifying that this instance has been migrated set_fact: - upgraded_postgres_version: '13' + upgraded_postgres_version: '{{ supported_pg_version }}' # Cleanup old Postgres resources - name: Remove old Postgres StatefulSet @@ -126,23 +136,32 @@ kind: StatefulSet api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" - name: "{{ ansible_operator_meta.name }}-postgres" + name: "{{ item }}" state: absent wait: true + loop: + - "{{ ansible_operator_meta.name }}-postgres" + - "{{ ansible_operator_meta.name }}-postgres-13" - name: Remove old Postgres Service k8s: kind: Service api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" - name: "{{ ansible_operator_meta.name }}-postgres" + name: "{{ item }}" state: absent + loop: + - "{{ ansible_operator_meta.name }}-postgres" + - "{{ ansible_operator_meta.name }}-postgres-13" - name: Remove old persistent volume claim k8s: kind: PersistentVolumeClaim api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" - name: "postgres-{{ ansible_operator_meta.name }}-postgres-0" + name: "{{ item }}" state: absent + loop: + - "postgres-{{ ansible_operator_meta.name }}-postgres-0" + - "postgres-{{ ansible_operator_meta.name }}-postgres-13-0" when: postgres_keep_pvc_after_upgrade diff --git a/roles/installer/templates/statefulsets/postgres.yaml.j2 b/roles/installer/templates/statefulsets/postgres.yaml.j2 index ce4e21414..754222116 100644 --- a/roles/installer/templates/statefulsets/postgres.yaml.j2 +++ b/roles/installer/templates/statefulsets/postgres.yaml.j2 @@ -59,7 +59,7 @@ spec: args: {{ postgres_extra_args }} {% endif %} env: - # For postgres_image based on rhel8/postgresql-13 + # For postgres_image based on rhel8/postgresql-{{ supported_pg_version }} - name: POSTGRESQL_DATABASE valueFrom: secretKeyRef: diff --git a/roles/installer/vars/main.yml b/roles/installer/vars/main.yml index 8c341b2c1..01fc74833 100644 --- a/roles/installer/vars/main.yml +++ b/roles/installer/vars/main.yml @@ -4,4 +4,6 @@ postgres_host_auth_method: 'scram-sha-256' ldap_cacert_ca_crt: '' bundle_ca_crt: '' projects_existing_claim: '' -supported_pg_version: 13 +supported_pg_version: 15 +_previous_upgraded_pg_version: 0 +old_postgres_pod: [] diff --git a/roles/restore/README.md b/roles/restore/README.md index aaf94ea4a..2676d90c9 100644 --- a/roles/restore/README.md +++ b/roles/restore/README.md @@ -19,7 +19,7 @@ This role assumes you are authenticated with an Openshift or Kubernetes cluster: *Before Restoring from a backup*, be sure to: - delete the old existing AWX CR - - delete the persistent volume claim (PVC) for the database from the old deployment, which has a name like `postgres-13--postgres-13-0` + - delete the persistent volume claim (PVC) for the database from the old deployment, which has a name like `postgres---postgres--0` **Note**: Do not delete the namespace/project, as that will delete the backup and the backup's PVC as well. diff --git a/roles/restore/vars/main.yml b/roles/restore/vars/main.yml index 2495549f7..906627173 100644 --- a/roles/restore/vars/main.yml +++ b/roles/restore/vars/main.yml @@ -1,8 +1,8 @@ --- deployment_type: "awx" -_postgres_image: postgres -_postgres_image_version: 13 +_postgres_image: quay.io/sclorg/postgresql-15-c9s +_postgres_image_version: latest backup_api_version: '{{ deployment_type }}.ansible.com/v1beta1' backup_kind: 'AWXBackup' @@ -12,7 +12,7 @@ secret_key_secret: '{{ deployment_name }}-secret-key' admin_password_secret: '{{ deployment_name }}-admin-password' broadcast_websocket_secret: '{{ deployment_name }}-broadcast-websocket' postgres_configuration_secret: '{{ deployment_name }}-postgres-configuration' -supported_pg_version: 13 +supported_pg_version: 15 image_pull_policy: IfNotPresent # If set to true, the restore process will delete the existing database and create a new one