From a8d1bbffe1e8a019d7582dd9cc265eed5b87f3df Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Thu, 7 Mar 2024 17:22:19 -0500 Subject: [PATCH 01/27] Upgrading to PostgreSQL 15 and moving to sclorg images --- config/manager/manager.yaml | 2 +- ...galaxy-operator.clusterserviceversion.yaml | 2 +- ...alaxy_v1beta1_galaxybackup_cr.default.yaml | 1 - roles/backup/defaults/main.yml | 2 - roles/backup/tasks/postgres.yml | 2 +- roles/backup/templates/secrets.yaml.j2 | 2 +- roles/backup/vars/main.yml | 4 +- roles/common/defaults/main.yml | 3 +- .../common/templates/postgres.secret.yaml.j2 | 2 +- roles/common/vars/main.yml | 1 + roles/galaxy-status/tasks/main.yml | 2 +- roles/postgres/defaults/main.yml | 1 - roles/postgres/tasks/main.yml | 68 ++++++++++++++----- roles/postgres/tasks/migrate_data.yml | 2 +- roles/postgres/tasks/upgrade_postgres.yml | 64 ++++++++++++----- roles/postgres/templates/postgres.yaml.j2 | 18 ++--- .../templates/postgres_upgrade.secret.yaml.j2 | 2 +- roles/postgres/vars/main.yml | 4 ++ roles/redis/defaults/main.yml | 2 +- roles/restore/tasks/postgres.yml | 2 +- roles/restore/tasks/secrets.yml | 2 +- roles/restore/vars/main.yml | 3 +- 22 files changed, 128 insertions(+), 63 deletions(-) create mode 100644 roles/postgres/vars/main.yml diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index b9c97986..283ec7ed 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -61,7 +61,7 @@ spec: - name: RELATED_IMAGE_GALAXY_REDIS value: redis:latest - name: RELATED_IMAGE_GALAXY_POSTGRES - value: postgres:13 + value: quay.io/sclorg/postgresql-15-c9s:latest - name: RELATED_IMAGE_GALAXY_INIT_GPG_CONTAINER value: quay.io/centos/centos:stream9 securityContext: diff --git a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml index 7565b772..0d4ed750 100644 --- a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml @@ -965,7 +965,7 @@ spec: - name: RELATED_IMAGE_GALAXY_REDIS value: redis:latest - name: RELATED_IMAGE_GALAXY_POSTGRES - value: postgres:13 + value: quay.io/sclorg/postgresql-15-c9s:latest - name: RELATED_IMAGE_GALAXY_INIT_GPG_CONTAINER value: quay.io/centos/centos:stream9 image: quay.io/ansible/galaxy-operator:main diff --git a/config/samples/galaxy_v1beta1_galaxybackup_cr.default.yaml b/config/samples/galaxy_v1beta1_galaxybackup_cr.default.yaml index dcf51f4a..7eaf1aa6 100644 --- a/config/samples/galaxy_v1beta1_galaxybackup_cr.default.yaml +++ b/config/samples/galaxy_v1beta1_galaxybackup_cr.default.yaml @@ -9,4 +9,3 @@ spec: backup_pvc: '' backup_storage_requirements: '' backup_storage_class: '' - postgres_label_selector: '' diff --git a/roles/backup/defaults/main.yml b/roles/backup/defaults/main.yml index a3ab321a..aa2e51ca 100644 --- a/roles/backup/defaults/main.yml +++ b/roles/backup/defaults/main.yml @@ -12,8 +12,6 @@ backup_storage_requirements: '' # Specify storage class to determine how to dynamically create PVC's with backup_storage_class: '' -postgres_version: 13 - # Secret Names admin_password_secret: "{{ deployment_name }}-admin-password" postgres_configuration_secret: "{{ deployment_name }}-postgres-configuration" diff --git a/roles/backup/tasks/postgres.yml b/roles/backup/tasks/postgres.yml index 55671bfa..7a03bb54 100644 --- a/roles/backup/tasks/postgres.yml +++ b/roles/backup/tasks/postgres.yml @@ -44,7 +44,7 @@ namespace: '{{ ansible_operator_meta.namespace }}' label_selectors: - "{{ postgres_label_selector }}" - - "app.kubernetes.io/version={{ postgres_version }}" + - "app.kubernetes.io/version={{ supported_pg_version }}" register: postgres_pod until: "postgres_pod['resources'][0]['status']['phase'] == 'Running'" delay: 5 diff --git a/roles/backup/templates/secrets.yaml.j2 b/roles/backup/templates/secrets.yaml.j2 index e8defbc5..2f254ee6 100644 --- a/roles/backup/templates/secrets.yaml.j2 +++ b/roles/backup/templates/secrets.yaml.j2 @@ -8,7 +8,7 @@ database_port: {{ database_port }} database_host: {{ database_host }} database_type: {{ database_type }} database_sslmode: {{ postgres_sslmode }} -postgres_version: {{ postgres_version }} +postgres_version: {{ supported_pg_version }} {% if database_type == 'unmanaged' %} db_secret_name: {{ postgres_configuration_secret }} {% endif %} diff --git a/roles/backup/vars/main.yml b/roles/backup/vars/main.yml index 7fc1b4d6..4f11ce13 100644 --- a/roles/backup/vars/main.yml +++ b/roles/backup/vars/main.yml @@ -1,3 +1,5 @@ --- deployment_type: "galaxy" -_postgres_image: postgres:13 +_postgres_image: quay.io/sclorg/postgresql-15-c9s:latest + +supported_pg_version: 15 diff --git a/roles/common/defaults/main.yml b/roles/common/defaults/main.yml index e760fdc3..2eb3a8f3 100644 --- a/roles/common/defaults/main.yml +++ b/roles/common/defaults/main.yml @@ -93,8 +93,7 @@ default_azure_settings: DEFAULT_FILE_STORAGE: "storages.backends.azure_storage.AzureStorage" # postgres_configuration.yml -postgres_version: 13 -_postgres_image: postgres:13 +_postgres_image: quay.io/sclorg/postgresql-15-c9s:latest # Secret to lookup that provide the PostgreSQL configuration postgres_configuration_secret: '{{ ansible_operator_meta.name }}-postgres-configuration' diff --git a/roles/common/templates/postgres.secret.yaml.j2 b/roles/common/templates/postgres.secret.yaml.j2 index f5f30f5d..9eb706b6 100644 --- a/roles/common/templates/postgres.secret.yaml.j2 +++ b/roles/common/templates/postgres.secret.yaml.j2 @@ -10,6 +10,6 @@ stringData: username: '{{ deployment_type }}' database: '{{ deployment_type }}' port: '5432' - host: {{ ansible_operator_meta.name }}-postgres-{{ postgres_version }} + host: {{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }} sslmode: {{ postgres_sslmode | default('prefer') }} type: 'managed' diff --git a/roles/common/vars/main.yml b/roles/common/vars/main.yml index 6070c90e..9036e0d5 100644 --- a/roles/common/vars/main.yml +++ b/roles/common/vars/main.yml @@ -1,2 +1,3 @@ --- _entrypoint_dir: /venv/bin +supported_pg_version: 15 diff --git a/roles/galaxy-status/tasks/main.yml b/roles/galaxy-status/tasks/main.yml index 1a3b71a9..6c0e8ed0 100644 --- a/roles/galaxy-status/tasks/main.yml +++ b/roles/galaxy-status/tasks/main.yml @@ -25,7 +25,7 @@ name: "{{ ansible_operator_meta.name }}" namespace: "{{ ansible_operator_meta.namespace }}" status: - upgradedPostgresVersion: "{{ upgraded_postgres_version }}" + upgradedPostgresVersion: "{{ upgraded_postgres_version | string }}" when: - upgraded_postgres_version is defined - upgraded_postgres_version | length diff --git a/roles/postgres/defaults/main.yml b/roles/postgres/defaults/main.yml index eeb5af00..93f7dd2e 100644 --- a/roles/postgres/defaults/main.yml +++ b/roles/postgres/defaults/main.yml @@ -1,5 +1,4 @@ --- -postgres_version: 13 _postgres_image: postgres postgres_storage_requirements: diff --git a/roles/postgres/tasks/main.yml b/roles/postgres/tasks/main.yml index 4b4102c3..1a2baeff 100644 --- a/roles/postgres/tasks/main.yml +++ b/roles/postgres/tasks/main.yml @@ -1,6 +1,6 @@ --- -- k8s_status: +- operator_sdk.util.k8s_status: api_version: "{{ api_version }}" kind: "{{ kind }}" name: "{{ ansible_operator_meta.name }}" @@ -31,19 +31,42 @@ postgres_label_selector: "app.kubernetes.io/instance=postgres-{{ ansible_operator_meta.name }}" when: postgres_label_selector is not defined -- name: Get the old postgres pod information - k8s_info: +# It is possible that N-2 postgres pods may still be present in the namespace from previous upgrades. +# So we have to take that into account and preferentially set the most recent one. +- name: Get the old postgres pod (N-1) + kubernetes.core.k8s_info: kind: Pod namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - "{{ postgres_label_selector }}" - - "app.kubernetes.io/version=12" field_selectors: - status.phase=Running - register: old_postgres_pod + register: _running_pods - block: - - k8s_status: + - name: Filter pods by name + set_fact: + filtered_old_postgres_pods: "{{ _running_pods.resources | + selectattr('metadata.name', 'match', ansible_operator_meta.name + '-postgres.*-0') | + rejectattr('metadata.name', 'search', '-' + supported_pg_version | string + '-0') | + list }}" + + # Sort pods by name in reverse order (most recent PG version first) and set + - name: Set info for previous postgres pod + set_fact: + sorted_old_postgres_pods: "{{ filtered_old_postgres_pods | + sort(attribute='metadata.name') | + reverse }}" + when: filtered_old_postgres_pods | length + + - name: Set info for previous postgres pod + set_fact: + old_postgres_pod: "{{ sorted_old_postgres_pods | first }}" + when: filtered_old_postgres_pods | length + when: _running_pods.resources | length + +- block: + - operator_sdk.util.k8s_status: api_version: "{{ api_version }}" kind: "{{ kind }}" name: "{{ ansible_operator_meta.name }}" @@ -59,17 +82,23 @@ k8s: apply: true definition: "{{ lookup('template', 'postgres.yaml.j2') }}" - when: managed_database - name: Look up details for this deployment - k8s_info: + kubernetes.core.k8s_info: api_version: "{{ api_version }}" kind: "{{ kind }}" name: "{{ ansible_operator_meta.name }}" namespace: "{{ ansible_operator_meta.namespace }}" register: this_galaxy +# If this deployment has been upgraded before or if upgrade has already been started, set this var +- name: Set previous PG version var + set_fact: + _previous_upgraded_pg_version: "{{ this_galaxy['resources'][0]['status']['upgradedPostgresVersion'] | default(false) }}" + when: + - "'upgradedPostgresVersion' in this_galaxy['resources'][0]['status']" + - name: Check if postgres pod is running and version 12 block: - name: Set path to PG_VERSION file for given container image @@ -77,23 +106,26 @@ path_to_pg_version: '{{ postgres_data_path }}/PG_VERSION' - name: Get old PostgreSQL version - k8s_exec: + kubernetes.core.k8s_exec: namespace: "{{ ansible_operator_meta.namespace }}" - pod: "{{ old_postgres_pod['resources'][0]['metadata']['name'] }}" + pod: "{{ old_postgres_pod['metadata']['name'] }}" command: | bash -c """ cat {{ path_to_pg_version }} """ register: _old_pg_version - - name: Upgrade data dir from Postgres 12 to 13 if applicable + - debug: + msg: "--- Upgrading from {{ old_postgres_pod['metadata']['name'] | default('NONE')}} Pod ---" + + - name: Upgrade data dir from old Postgres to {{ supported_pg_version }} if applicable include_tasks: upgrade_postgres.yml when: - - _old_pg_version.stdout | default('0') | trim == '12' + - (_old_pg_version.stdout | default(0) | int ) < supported_pg_version when: - managed_database - - this_galaxy['resources'][0]['status']['upgradedPostgresVersion'] | default('none') != '12' - - old_postgres_pod['resources'] | length # upgrade is complete and old pg pod has been removed + - (_previous_upgraded_pg_version | default(false)) | ternary(_previous_upgraded_pg_version | int < supported_pg_version, true) + - old_postgres_pod | length # If empty, then old pg pod has been removed and we can assume the upgrade is complete - name: Migrate data from old Openshift instance import_tasks: migrate_data.yml @@ -103,16 +135,16 @@ - not database_status_present - name: Check PostgreSQL status - k8s_info: + kubernetes.core.k8s_info: api_version: v1 kind: StatefulSet - namespace: '{{ ansible_operator_meta.namespace }}' - name: '{{ ansible_operator_meta.name }}-postgres-{{ postgres_version }}' + namespace: "{{ ansible_operator_meta.namespace }}" + name: "{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}" register: _pg_sts_status # This status will probably not reflect the real state during the first playbook execution # I thought to put a wait_for here, but it would just delay the execution of the other tasks -- k8s_status: +- operator_sdk.util.k8s_status: api_version: "{{ api_version }}" kind: "{{ kind }}" name: "{{ ansible_operator_meta.name }}" diff --git a/roles/postgres/tasks/migrate_data.yml b/roles/postgres/tasks/migrate_data.yml index 4d402fcd..7e8ea166 100644 --- a/roles/postgres/tasks/migrate_data.yml +++ b/roles/postgres/tasks/migrate_data.yml @@ -21,7 +21,7 @@ namespace: '{{ ansible_operator_meta.namespace }}' label_selectors: - "{{ postgres_label_selector }}" - - "app.kubernetes.io/version={{ postgres_version }}" + - "app.kubernetes.io/version={{ supported_pg_version }}" register: postgres_pod until: "postgres_pod['resources'][0]['status']['phase'] == 'Running'" delay: 5 diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index 749a0bb1..f10680cc 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -1,10 +1,9 @@ --- # Upgrade Postgres (Managed Databases only) -# * If postgres version is not 12, and not an external postgres instance (when managed_database is yes), +# * If postgres version is not supported_pg_version, and not an external postgres instance (when managed_database is yes), # then run this playbook with include_tasks from database_configuration.yml -# * Data will be streamed via a pg_dump from the postgres 12 pod to the postgres 13 -# pod via a pg_restore. +# * Data will be streamed via a pg_dump from the postgres 12/13 pod to the postgres supported_pg_version - k8s_status: api_version: "{{ api_version }}" @@ -37,12 +36,12 @@ namespace: "{{ ansible_operator_meta.namespace }}" conditions: - type: Database-Ready - message: "Creating the new database configuration for postgres-{{ postgres_version }}" - reason: "CreatingNewSecretForPostgres{{ postgres_version }}" + message: "Creating the new database configuration for postgres-{{ supported_pg_version }}" + reason: "CreatingNewSecretForPostgres{{ supported_pg_version }}" status: "False" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" -- name: Create Database configuration with new -postgres-{{ postgres_version }} hostname +- name: Create Database configuration with new -postgres-{{ supported_pg_version }} hostname k8s: apply: true definition: "{{ lookup('template', 'postgres_upgrade.secret.yaml.j2') }}" @@ -50,7 +49,7 @@ - name: Set new database var to be used when configuring app credentials (resources_configuration.yml) set_fact: - postgres_host: "{{ ansible_operator_meta.name }}-postgres-{{ postgres_version }}" + postgres_host: "{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}" no_log: "{{ no_log }}" - k8s_status: @@ -74,7 +73,7 @@ - name: Set postgres label if not defined by user set_fact: - postgres_label_selector: "app.kubernetes.io/instance=postgres-{{ postgres_version }}-{{ ansible_operator_meta.name }}" + postgres_label_selector: "app.kubernetes.io/instance=postgres-{{ ansible_operator_meta.name }}" when: postgres_label_selector is not defined - block: @@ -84,7 +83,7 @@ namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - "{{ postgres_label_selector }}" - - "app.kubernetes.io/version={{ postgres_version }}" + - "app.kubernetes.io/version={{ supported_pg_version }}" field_selectors: - status.phase=Running register: postgres_pod @@ -97,7 +96,7 @@ rescue: - name: Set error message set_fact: - error_msg: "A Postgres {{ postgres_version }} Pod with the {{ postgres_label_selector }} label \ + error_msg: "A Postgres {{ supported_pg_version }} Pod with the {{ postgres_label_selector }} label \ is not available and in the Running state" - k8s_status: @@ -123,12 +122,34 @@ set_fact: postgres_pod_name: "{{ postgres_pod['resources'][0]['metadata']['name'] }}" +- name: Get the name of the service for the old postgres pod + k8s_info: + kind: Service + namespace: "{{ ansible_operator_meta.namespace }}" + label_selectors: + - "app.kubernetes.io/component=database" + - "app.kubernetes.io/instance=postgres-{{ ansible_operator_meta.name }}" + - "app.kubernetes.io/version={{ supported_pg_version }}" + - "app.kubernetes.io/managed-by={{ deployment_type }}-operator" + register: old_postgres_svc + +# TODO: The galaxy-operator labels are different. potential issue here. +- name: Get the name of the service for the old postgres pod + k8s_info: + kind: Service + namespace: "{{ ansible_operator_meta.namespace }}" + label_selectors: + - "app.kubernetes.io/component=database" + - "app.kubernetes.io/instance={{ old_postgres_pod.metadata.labels['app.kubernetes.io/instance'] }}" + - "app.kubernetes.io/version={{ old_postgres_pod.metadata.labels['app.kubernetes.io/version'] }}" # unique to the galaxy-operator + - "app.kubernetes.io/managed-by={{ deployment_type }}-operator" + register: old_postgres_svc + - name: Set full resolvable host name for old postgres pod set_fact: - resolvable_db_host: "{{ ansible_operator_meta.name }}-postgres-svc.{{ ansible_operator_meta.namespace }}.svc" # yamllint disable-line rule:line-length + resolvable_db_host: "{{ old_postgres_svc['resources'][0]['metadata']['name'] }}.{{ ansible_operator_meta.namespace }}.svc" # yamllint disable-line rule:line-length no_log: "{{ no_log }}" - - k8s_status: api_version: "{{ api_version }}" kind: "{{ kind }}" @@ -199,7 +220,7 @@ } keepalive_file=\"$(mktemp)\" while [[ -f \"$keepalive_file\" ]]; do - echo 'Migrating data to new PostgreSQL {{ supported_postgres_version }} Database...' + echo 'Migrating data to new PostgreSQL {{ supported_pg_version }} Database...' sleep 60 done & keepalive_pid=$! @@ -216,7 +237,7 @@ - name: Set flag signifying that this instance has been migrated set_fact: - upgraded_postgres_version: '13' + upgraded_postgres_version: '{{ supported_pg_version }}' - k8s_status: api_version: "{{ api_version }}" @@ -236,9 +257,12 @@ kind: StatefulSet api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" - name: "{{ ansible_operator_meta.name }}-postgres" + name: "{{ item }}" state: absent wait: true + loop: + - "{{ ansible_operator_meta.name }}-postgres" + - "{{ ansible_operator_meta.name }}-postgres-13" - k8s_status: api_version: "{{ api_version }}" @@ -257,8 +281,11 @@ kind: Service api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" - name: "{{ ansible_operator_meta.name }}-postgres-svc" + name: "{{ item }}" state: absent + loop: + - "{{ ansible_operator_meta.name }}-postgres" + - "{{ ansible_operator_meta.name }}-postgres-13" - k8s_status: api_version: "{{ api_version }}" @@ -277,8 +304,11 @@ kind: PersistentVolumeClaim api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" - name: "postgres-{{ ansible_operator_meta.name }}-postgres-0" + name: "{{ item }}" state: absent + loop: + - "postgres-{{ ansible_operator_meta.name }}-postgres-0" + - "postgres-{{ ansible_operator_meta.name }}-postgres-13-0" when: - postgres_keep_pvc_after_upgrade is defined - postgres_keep_pvc_after_upgrade | length diff --git a/roles/postgres/templates/postgres.yaml.j2 b/roles/postgres/templates/postgres.yaml.j2 index c6efbf91..fefc638a 100644 --- a/roles/postgres/templates/postgres.yaml.j2 +++ b/roles/postgres/templates/postgres.yaml.j2 @@ -3,7 +3,7 @@ apiVersion: v1 kind: StatefulSet metadata: - name: '{{ ansible_operator_meta.name }}-postgres-{{ postgres_version }}' + name: '{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}' namespace: '{{ ansible_operator_meta.namespace }}' annotations: # About kube-linter checks: https://docs.kubelinter.io/#/generated/checks ignore-check.kube-linter.io/unset-cpu-requirements: "Temporarily disabled" @@ -14,7 +14,7 @@ metadata: app.kubernetes.io/name: 'postgres' app.kubernetes.io/instance: 'postgres-{{ ansible_operator_meta.name }}' app.kubernetes.io/component: database - app.kubernetes.io/version: '{{ postgres_version }}' + app.kubernetes.io/version: '{{ supported_pg_version }}' app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' spec: @@ -23,10 +23,10 @@ spec: app.kubernetes.io/name: 'postgres' app.kubernetes.io/instance: 'postgres-{{ ansible_operator_meta.name }}' app.kubernetes.io/component: database - app.kubernetes.io/version: '{{ postgres_version }}' + app.kubernetes.io/version: '{{ supported_pg_version }}' app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' - serviceName: '{{ ansible_operator_meta.name }}-postgres-{{ postgres_version }}' + serviceName: '{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}' replicas: 1 updateStrategy: type: RollingUpdate @@ -36,7 +36,7 @@ spec: app.kubernetes.io/name: 'postgres' app.kubernetes.io/instance: 'postgres-{{ ansible_operator_meta.name }}' app.kubernetes.io/component: database - app.kubernetes.io/version: '{{ postgres_version }}' + app.kubernetes.io/version: '{{ supported_pg_version }}' app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' spec: @@ -52,7 +52,7 @@ spec: args: {{ postgres_extra_args }} {% endif %} env: - # For postgres_image based on rhel8/postgresql-13 + # For postgres_image based on rhel9/postgresql - name: POSTGRESQL_DATABASE valueFrom: secretKeyRef: @@ -156,13 +156,13 @@ spec: apiVersion: v1 kind: Service metadata: - name: '{{ ansible_operator_meta.name }}-postgres-{{ postgres_version }}' + name: '{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}' namespace: '{{ ansible_operator_meta.namespace }}' labels: app.kubernetes.io/name: 'postgres' app.kubernetes.io/instance: 'postgres-{{ ansible_operator_meta.name }}' app.kubernetes.io/component: database - app.kubernetes.io/version: '{{ postgres_version }}' + app.kubernetes.io/version: '{{ supported_pg_version }}' app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' spec: @@ -173,6 +173,6 @@ spec: app.kubernetes.io/name: 'postgres' app.kubernetes.io/instance: 'postgres-{{ ansible_operator_meta.name }}' app.kubernetes.io/component: database - app.kubernetes.io/version: '{{ postgres_version }}' + app.kubernetes.io/version: '{{ supported_pg_version }}' app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' diff --git a/roles/postgres/templates/postgres_upgrade.secret.yaml.j2 b/roles/postgres/templates/postgres_upgrade.secret.yaml.j2 index 052569b5..d3c12b8d 100644 --- a/roles/postgres/templates/postgres_upgrade.secret.yaml.j2 +++ b/roles/postgres/templates/postgres_upgrade.secret.yaml.j2 @@ -10,6 +10,6 @@ stringData: username: '{{ postgres_user }}' database: '{{ postgres_database }}' port: '{{ postgres_port }}' - host: {{ ansible_operator_meta.name }}-postgres-{{ postgres_version }} + host: {{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }} sslmode: {{ postgres_sslmode | default('prefer') }} type: 'managed' diff --git a/roles/postgres/vars/main.yml b/roles/postgres/vars/main.yml new file mode 100644 index 00000000..c65faa4b --- /dev/null +++ b/roles/postgres/vars/main.yml @@ -0,0 +1,4 @@ +--- +supported_pg_version: 15 +_previous_upgraded_pg_version: 0 +old_postgres_pod: [] diff --git a/roles/redis/defaults/main.yml b/roles/redis/defaults/main.yml index afbb17c9..84576d0b 100644 --- a/roles/redis/defaults/main.yml +++ b/roles/redis/defaults/main.yml @@ -1,5 +1,5 @@ --- -_redis_image: redis:7 +_redis_image: redis:latest redis_storage_size: 1Gi # Here we use _galaxy_ansible_com_galaxy to get un-modified cr diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index 64ab3a2b..294103ad 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -46,7 +46,7 @@ namespace: '{{ ansible_operator_meta.namespace }}' label_selectors: - "{{ postgres_label_selector }}" - - "app.kubernetes.io/version={{ postgres_version }}" + - "app.kubernetes.io/version={{ supported_pg_version }}" register: postgres_pod until: - "postgres_pod['resources'] | length" diff --git a/roles/restore/tasks/secrets.yml b/roles/restore/tasks/secrets.yml index 7d205896..da797634 100644 --- a/roles/restore/tasks/secrets.yml +++ b/roles/restore/tasks/secrets.yml @@ -33,7 +33,7 @@ - name: If deployment is managed, set the database_host in the pg config secret set_fact: - database_host: "{{ deployment_name }}-postgres-{{ postgres_version }}" + database_host: "{{ deployment_name }}-postgres-{{ supported_pg_version }}" db_secret_name: "{{ deployment_name }}-postgres-configuration" no_log: "{{ no_log }}" when: diff --git a/roles/restore/vars/main.yml b/roles/restore/vars/main.yml index c55bc41c..de4d7159 100644 --- a/roles/restore/vars/main.yml +++ b/roles/restore/vars/main.yml @@ -1,6 +1,7 @@ --- +supported_pg_version: 15 deployment_type: "galaxy" -_postgres_image: postgres:13 +_postgres_image: quay.io/sclorg/postgresql-15-c9s:latest custom_resource_key: '_galaxy_ansible_com_galaxyrestore' From 7e39aee5d5ac84eeceaf4d2e69c31603ec9a3b6f Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 8 Mar 2024 16:13:07 -0500 Subject: [PATCH 02/27] Update scale down utility playbook to scale down the correct deployments --- roles/postgres/tasks/migrate_data.yml | 9 ++------ roles/postgres/tasks/scale_down.yml | 18 ---------------- .../postgres/tasks/scale_down_deployments.yml | 21 +++++++++++++++++++ roles/postgres/tasks/upgrade_postgres.yml | 2 +- roles/restore/tasks/postgres.yml | 8 ++----- roles/restore/tasks/scale_down.yml | 18 ---------------- .../restore/tasks/scale_down_deployments.yml | 21 +++++++++++++++++++ 7 files changed, 47 insertions(+), 50 deletions(-) delete mode 100644 roles/postgres/tasks/scale_down.yml create mode 100644 roles/postgres/tasks/scale_down_deployments.yml delete mode 100644 roles/restore/tasks/scale_down.yml create mode 100644 roles/restore/tasks/scale_down_deployments.yml diff --git a/roles/postgres/tasks/migrate_data.yml b/roles/postgres/tasks/migrate_data.yml index 7e8ea166..cea4fff1 100644 --- a/roles/postgres/tasks/migrate_data.yml +++ b/roles/postgres/tasks/migrate_data.yml @@ -43,13 +43,8 @@ status: "False" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" -- include: scale_down.yml deployment_name={{ item }} - with_items: - - "{{ ansible_operator_meta.name }}-api" - - "{{ ansible_operator_meta.name }}-content" - - "{{ ansible_operator_meta.name }}-resource-manager" - - "{{ ansible_operator_meta.name }}-worker" - - "{{ ansible_operator_meta.name }}-web" +- name: Scale down deployments for migration + include: scale_down_deployments.yml - k8s_status: api_version: "{{ api_version }}" diff --git a/roles/postgres/tasks/scale_down.yml b/roles/postgres/tasks/scale_down.yml deleted file mode 100644 index e15542e8..00000000 --- a/roles/postgres/tasks/scale_down.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- - -- name: Check for presence of Deployment - k8s_info: - api_version: v1 - kind: Deployment - name: "{{ deployment_name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - register: scale_deployment - -- name: Scale down Deployment for migration - k8s_scale: - api_version: v1 - kind: Deployment - name: "{{ deployment_name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - replicas: 0 - when: scale_deployment['resources'] | length diff --git a/roles/postgres/tasks/scale_down_deployments.yml b/roles/postgres/tasks/scale_down_deployments.yml new file mode 100644 index 00000000..76fb88e8 --- /dev/null +++ b/roles/postgres/tasks/scale_down_deployments.yml @@ -0,0 +1,21 @@ +--- +- name: Check for presence of Deployment + k8s_info: + api_version: apps/v1 + kind: Deployment + namespace: "{{ ansible_operator_meta.namespace }}" + label_selectors: + - 'app.kubernetes.io/part-of={{ ansible_operator_meta.name }}' + - 'app.kubernetes.io/managed-by={{ deployment_type }}-operator' + register: _deployments + +- name: Scale down Deployment for migration + kubernetes.core.k8s_scale: + api_version: apps/v1 + kind: Deployment + name: "{{ item }}" + namespace: "{{ ansible_operator_meta.namespace }}" + replicas: 0 + wait: yes + loop: "{{ _deployments.resources | map(attribute='metadata.name') | list }}" + when: _deployments.resources | length diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index f10680cc..72d43bda 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -18,7 +18,7 @@ lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" - name: Scale down Deployment for migration - include_tasks: scale_down_deployment.yml + include_tasks: scale_down_deployments.yml - name: Delete existing postgres configuration secret k8s: diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index 294103ad..14b2c9dc 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -82,12 +82,8 @@ when: pvc_status.resources | length == 0 when: storage_type | lower == 'file' -- include: scale_down.yml deploy_name={{ item }} - with_items: - - "{{ deployment_name}}-api" - - "{{ deployment_name }}-content" - - "{{ deployment_name }}-worker" - - "{{ deployment_name }}-web" +- name: Scale down deployments for migration + include: scale_down_deployments.yml - name: Set full resolvable host name for postgres pod set_fact: diff --git a/roles/restore/tasks/scale_down.yml b/roles/restore/tasks/scale_down.yml deleted file mode 100644 index cd0b7fd3..00000000 --- a/roles/restore/tasks/scale_down.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- - -- name: Check for presence of Deployment - k8s_info: - api_version: v1 - kind: Deployment - name: "{{ deploy_name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - register: scale_deployment - -- name: Scale down Deployment for migration - k8s_scale: - api_version: v1 - kind: Deployment - name: "{{ deploy_name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - replicas: 0 - when: scale_deployment['resources'] | length diff --git a/roles/restore/tasks/scale_down_deployments.yml b/roles/restore/tasks/scale_down_deployments.yml new file mode 100644 index 00000000..b48f22f8 --- /dev/null +++ b/roles/restore/tasks/scale_down_deployments.yml @@ -0,0 +1,21 @@ +--- +- name: Check for presence of Deployment + k8s_info: + api_version: apps/v1 + kind: Deployment + namespace: "{{ ansible_operator_meta.namespace }}" + label_selectors: + - 'app.kubernetes.io/part-of={{ deploy_name }}' + - 'app.kubernetes.io/managed-by={{ deployment_type }}-operator' + register: _deployments + +- name: Scale down Deployment for migration + kubernetes.core.k8s_scale: + api_version: apps/v1 + kind: Deployment + name: "{{ item }}" + namespace: "{{ ansible_operator_meta.namespace }}" + replicas: 0 + wait: yes + loop: "{{ _deployments.resources | map(attribute='metadata.name') | list }}" + when: _deployments.resources | length From 7dc84a559f0b104bf6a6a6a4b901cab32408d6ee Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 8 Mar 2024 16:14:20 -0500 Subject: [PATCH 03/27] Use new v1.34.1 ansible-operator base image and operator_sdk.util 0.5.0 - Numbers are not valid in camel-case status reason with 0.5.0 - The new image was needed because selectattr and rejectattr filtering exhibited different behavior in ansible-core 2.9, which did not work here. - the new ansible-operator image does not have the openshift python package, so it was necessary to upgrade to operator_sdk-util 0.5.0, which no longer requires the openshift python package. --- Dockerfile | 2 +- requirements.yml | 2 +- roles/postgres/tasks/upgrade_postgres.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index f4f3b25d..b6c7e043 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM quay.io/operator-framework/ansible-operator:v1.23.0 +FROM quay.io/operator-framework/ansible-operator:v1.34.1 ARG DEFAULT_GALAXY_VERSION ARG DEFAULT_GALAXY_UI_VERSION diff --git a/requirements.yml b/requirements.yml index 3215a21f..d2c7595a 100644 --- a/requirements.yml +++ b/requirements.yml @@ -5,4 +5,4 @@ collections: - name: kubernetes.core version: "==2.3.2" - name: operator_sdk.util - version: "0.4.0" + version: "0.5.0" diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index 72d43bda..72693fdd 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -37,7 +37,7 @@ conditions: - type: Database-Ready message: "Creating the new database configuration for postgres-{{ supported_pg_version }}" - reason: "CreatingNewSecretForPostgres{{ supported_pg_version }}" + reason: "CreatingNewSecretForPostgres" status: "False" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" From d617ec6ee49086e3b9842517003f7c50751c930d Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 8 Mar 2024 16:18:06 -0500 Subject: [PATCH 04/27] Check PG_VERSION paths for both postgres and sclorg postgresql image - Since we cannot gaurantee which postgres image was used, we must check PG_VERSION in the paths for each image --- roles/postgres/tasks/main.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/roles/postgres/tasks/main.yml b/roles/postgres/tasks/main.yml index 1a2baeff..e0491831 100644 --- a/roles/postgres/tasks/main.yml +++ b/roles/postgres/tasks/main.yml @@ -104,15 +104,22 @@ - name: Set path to PG_VERSION file for given container image set_fact: path_to_pg_version: '{{ postgres_data_path }}/PG_VERSION' + path_to_pg_version_old: '/var/lib/postgresql/data/pgdata/PG_VERSION' - - name: Get old PostgreSQL version + - name: Get old PostgreSQL version (Checking old pg data path too) kubernetes.core.k8s_exec: namespace: "{{ ansible_operator_meta.namespace }}" pod: "{{ old_postgres_pod['metadata']['name'] }}" command: | - bash -c """ - cat {{ path_to_pg_version }} - """ + bash -c ' + if [ -f {{ path_to_pg_version }} ]; then + cat {{ path_to_pg_version }} + elif [ -f {{ path_to_pg_version_old }} ]; then + cat {{ path_to_pg_version_old }} + else + echo "Neither file exists" + fi + ' register: _old_pg_version - debug: From 9875002962bd55ae002d7f7a16ecd144d4cbb66c Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 8 Mar 2024 16:43:47 -0500 Subject: [PATCH 05/27] Delete the old Postgres PVC by default after Postgres upgrade --- roles/postgres/defaults/main.yml | 2 +- roles/postgres/tasks/upgrade_postgres.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/roles/postgres/defaults/main.yml b/roles/postgres/defaults/main.yml index 93f7dd2e..0c0eeed3 100644 --- a/roles/postgres/defaults/main.yml +++ b/roles/postgres/defaults/main.yml @@ -21,7 +21,7 @@ postgres_host_auth_method: 'scram-sha-256' postgres_selector: '' # Specify whether or not to keep the old PVC after PostgreSQL upgrades -postgres_keep_pvc_after_upgrade: true +postgres_keep_pvc_after_upgrade: false # Add node tolerations for the Postgres pods. # Specify as literal block. E.g.: diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index 72693fdd..589052df 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -310,8 +310,7 @@ - "postgres-{{ ansible_operator_meta.name }}-postgres-0" - "postgres-{{ ansible_operator_meta.name }}-postgres-13-0" when: - - postgres_keep_pvc_after_upgrade is defined - - postgres_keep_pvc_after_upgrade | length + - not postgres_keep_pvc_after_upgrade - k8s_status: api_version: "{{ api_version }}" From 50c428fa83bbe32f046d4fb803ba08fe403f810b Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 8 Mar 2024 17:19:38 -0500 Subject: [PATCH 06/27] Scale up web and content replicas after upgrade and restore --- roles/postgres/tasks/upgrade_postgres.yml | 19 +++++++++++++++++++ roles/restore/tasks/postgres.yml | 17 +++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index 589052df..a2d34e55 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -323,3 +323,22 @@ reason: DatabaseTasksFinished status: "True" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" + +- name: Scale up web and content Deployments using replicas from the Galaxy CR + kubernetes.core.k8s_scale: + api_version: apps/v1 + kind: Deployment + name: "{{ item.name }}" + namespace: "{{ ansible_operator_meta.namespace }}" + replicas: "{{ item.replicas }}" + wait: yes + loop: + - name: "{{ ansible_operator_meta.name }}-web" + replicas: "{{ cr_spec['web']['replicas'] }}" + - name: "{{ ansible_operator_meta.name }}-content" + replicas: "{{ cr_spec['content']['replicas'] }}" + when: + - combined_web.replicas is defined + - combined_web.replicas | int > 0 + - combined_content.replicas is defined + - combined_content.replicas | int > 0 diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index 14b2c9dc..df8c549c 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -159,3 +159,20 @@ " register: data_migration no_log: "{{ no_log }}" + +- name: Scale up web and content Deployments using replicas from the Galaxy CR + kubernetes.core.k8s_scale: + api_version: apps/v1 + kind: Deployment + name: "{{ item.name }}" + namespace: "{{ ansible_operator_meta.namespace }}" + replicas: "{{ item.replicas }}" + wait: yes + loop: + - name: "{{ ansible_operator_meta.name }}-web" + replicas: "{{ cr_spec['web']['replicas'] }}" + - name: "{{ ansible_operator_meta.name }}-content" + replicas: "{{ cr_spec['content']['replicas'] }}" + when: + - cr_spec['web']['replicas'] | int > 0 + - cr_spec['content']['replicas'] | int > 0 From 32223b76c8efdc6a1140bdf24d55b5e3df7c340b Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 8 Mar 2024 20:11:59 -0500 Subject: [PATCH 07/27] Fix indentation error with task that removes the pg pvc - Fix typo when referencing replicas - Scale up pods after upgrade - Removed unneeded default for upgraded_postgres_version variable --- roles/galaxy-status/defaults/main.yml | 1 - roles/galaxy-status/tasks/main.yml | 1 - roles/postgres/tasks/upgrade_postgres.yml | 27 ++++++++++++----------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/roles/galaxy-status/defaults/main.yml b/roles/galaxy-status/defaults/main.yml index a1ab1b82..e21123ed 100644 --- a/roles/galaxy-status/defaults/main.yml +++ b/roles/galaxy-status/defaults/main.yml @@ -1,6 +1,5 @@ --- postgres_migrated_from_secret: '' -upgraded_postgres_version: '' ingress_type: none diff --git a/roles/galaxy-status/tasks/main.yml b/roles/galaxy-status/tasks/main.yml index 6c0e8ed0..c66d9822 100644 --- a/roles/galaxy-status/tasks/main.yml +++ b/roles/galaxy-status/tasks/main.yml @@ -28,7 +28,6 @@ upgradedPostgresVersion: "{{ upgraded_postgres_version | string }}" when: - upgraded_postgres_version is defined - - upgraded_postgres_version | length - name: Get the resource pod information. k8s_info: diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index a2d34e55..ef709ac0 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -306,9 +306,9 @@ namespace: "{{ ansible_operator_meta.namespace }}" name: "{{ item }}" state: absent - loop: - - "postgres-{{ ansible_operator_meta.name }}-postgres-0" - - "postgres-{{ ansible_operator_meta.name }}-postgres-13-0" + loop: + - "postgres-{{ ansible_operator_meta.name }}-postgres-0" + - "postgres-{{ ansible_operator_meta.name }}-postgres-13-0" when: - not postgres_keep_pvc_after_upgrade @@ -324,21 +324,22 @@ status: "True" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" -- name: Scale up web and content Deployments using replicas from the Galaxy CR +# Default to 1 replica if not specified +- name: Scale up web and content Deployments using replicas from the custom resource kubernetes.core.k8s_scale: - api_version: apps/v1 + api_version: v1 kind: Deployment name: "{{ item.name }}" namespace: "{{ ansible_operator_meta.namespace }}" replicas: "{{ item.replicas }}" - wait: yes loop: + - name: "{{ ansible_operator_meta.name }}-api" + replicas: "{{ combined_api.replicas | default(1) }}" - name: "{{ ansible_operator_meta.name }}-web" - replicas: "{{ cr_spec['web']['replicas'] }}" + replicas: "{{ combined_web.replicas | default(1)}}" - name: "{{ ansible_operator_meta.name }}-content" - replicas: "{{ cr_spec['content']['replicas'] }}" - when: - - combined_web.replicas is defined - - combined_web.replicas | int > 0 - - combined_content.replicas is defined - - combined_content.replicas | int > 0 + replicas: "{{ combined_content.replicas | default(1) }}" + - name: "{{ ansible_operator_meta.name }}-worker" + replicas: "{{ combined_worker.replicas | default(1)}}" + - name: "{{ ansible_operator_meta.name }}-redis" + replicas: "{{ combined_redis.replicas | default(1)}}" From d8320d7df72a4d4404f4b6f19155c78713b7cc12 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Mon, 11 Mar 2024 11:03:01 -0400 Subject: [PATCH 08/27] Set env vars from secrets so when pods cycle values are updated - without this, the db host env var is not updated on the deployment until the deployment yaml is patched in a future reconciliation loop. - remove logic to scale up the content and web pods after upgrading and rely on a second reconciliation loop for now. --- playbooks/galaxy.yaml | 2 +- roles/common/tasks/postgres_configuration.yml | 5 ++++- .../templates/galaxy-api.deployment.yaml.j2 | 20 +++++++++++++++---- .../galaxy-content.deployment.yaml.j2 | 10 ++++++++-- .../galaxy-worker.deployment.yaml.j2 | 10 ++++++++-- roles/postgres/tasks/upgrade_postgres.yml | 19 ------------------ roles/postgres/templates/postgres.yaml.j2 | 12 +++++------ 7 files changed, 43 insertions(+), 35 deletions(-) diff --git a/playbooks/galaxy.yaml b/playbooks/galaxy.yaml index 3d1e867e..50bb1cb4 100644 --- a/playbooks/galaxy.yaml +++ b/playbooks/galaxy.yaml @@ -107,4 +107,4 @@ - galaxy-worker - galaxy-api - galaxy-route - - galaxy-status \ No newline at end of file + - galaxy-status diff --git a/roles/common/tasks/postgres_configuration.yml b/roles/common/tasks/postgres_configuration.yml index d18b29ad..009200e5 100644 --- a/roles/common/tasks/postgres_configuration.yml +++ b/roles/common/tasks/postgres_configuration.yml @@ -82,12 +82,15 @@ - postgres_migrant_configuration_secret is defined - recorded_db_migration_secret == postgres_migrant_configuration_secret - - name: Set PostgreSQL Configuration set_fact: pg_config: '{{ _generated_pg_config_resources["resources"] | default([]) | length | ternary(_generated_pg_config_resources, _pg_config) }}' no_log: "{{ no_log }}" +- name: Set actual postgres configuration secret used + set_fact: + _postgres_configuration_secret: "{{ pg_config['resources'][0]['metadata']['name'] }}" + - name: Set user provided postgres image set_fact: _custom_postgres_image: "{{ postgres_image }}" diff --git a/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 b/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 index 0f383d36..215e0139 100644 --- a/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 +++ b/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 @@ -139,9 +139,15 @@ spec: - name: ENTRYPOINT value: "{{ _entrypoint_dir }}/pulpcore-api" - name: POSTGRES_SERVICE_HOST - value: "{{ postgres_host }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: host - name: POSTGRES_SERVICE_PORT - value: "{{ postgres_port }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: port {% if pulp_combined_settings.cache_enabled %} - name: REDIS_SERVICE_HOST value: "{{ ansible_operator_meta.name }}-redis-svc" @@ -257,9 +263,15 @@ spec: pulpcore-manager migrate env: - name: POSTGRES_SERVICE_HOST - value: "{{ postgres_host }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: host - name: POSTGRES_SERVICE_PORT - value: "{{ postgres_port }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: port - name: HOME value: "/var/lib/pulp" volumeMounts: diff --git a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 index 18642a36..2dfa9d22 100644 --- a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 +++ b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 @@ -161,9 +161,15 @@ spec: - name: ENTRYPOINT value: "{{ _entrypoint_dir }}/pulpcore-content" - name: POSTGRES_SERVICE_HOST - value: "{{ postgres_host }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: host - name: POSTGRES_SERVICE_PORT - value: "{{ postgres_port }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: port {% if pulp_combined_settings.cache_enabled %} - name: REDIS_SERVICE_HOST value: "{{ ansible_operator_meta.name }}-redis-svc" diff --git a/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 b/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 index 4e1b7e4c..e66d151f 100644 --- a/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 +++ b/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 @@ -121,9 +121,15 @@ spec: - start-worker env: - name: POSTGRES_SERVICE_HOST - value: "{{ postgres_host }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: host - name: POSTGRES_SERVICE_PORT - value: "{{ postgres_port }}" + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: port {% if pulp_combined_settings.cache_enabled %} - name: REDIS_SERVICE_HOST value: "{{ ansible_operator_meta.name }}-redis-svc" diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index ef709ac0..ef2a0fcf 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -324,22 +324,3 @@ status: "True" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" -# Default to 1 replica if not specified -- name: Scale up web and content Deployments using replicas from the custom resource - kubernetes.core.k8s_scale: - api_version: v1 - kind: Deployment - name: "{{ item.name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - replicas: "{{ item.replicas }}" - loop: - - name: "{{ ansible_operator_meta.name }}-api" - replicas: "{{ combined_api.replicas | default(1) }}" - - name: "{{ ansible_operator_meta.name }}-web" - replicas: "{{ combined_web.replicas | default(1)}}" - - name: "{{ ansible_operator_meta.name }}-content" - replicas: "{{ combined_content.replicas | default(1) }}" - - name: "{{ ansible_operator_meta.name }}-worker" - replicas: "{{ combined_worker.replicas | default(1)}}" - - name: "{{ ansible_operator_meta.name }}-redis" - replicas: "{{ combined_redis.replicas | default(1)}}" diff --git a/roles/postgres/templates/postgres.yaml.j2 b/roles/postgres/templates/postgres.yaml.j2 index fefc638a..f52f4fd2 100644 --- a/roles/postgres/templates/postgres.yaml.j2 +++ b/roles/postgres/templates/postgres.yaml.j2 @@ -56,34 +56,34 @@ spec: - name: POSTGRESQL_DATABASE valueFrom: secretKeyRef: - name: '{{ postgres_configuration_secret }}' + name: '{{ _postgres_configuration_secret }}' key: database - name: POSTGRESQL_USER valueFrom: secretKeyRef: - name: '{{ postgres_configuration_secret }}' + name: '{{ _postgres_configuration_secret }}' key: username - name: POSTGRESQL_PASSWORD valueFrom: secretKeyRef: - name: '{{ postgres_configuration_secret }}' + name: '{{ _postgres_configuration_secret }}' key: password # For postgres_image based on postgres - name: POSTGRES_DB valueFrom: secretKeyRef: - name: '{{ postgres_configuration_secret }}' + name: '{{ _postgres_configuration_secret }}' key: database - name: POSTGRES_USER valueFrom: secretKeyRef: - name: '{{ postgres_configuration_secret }}' + name: '{{ _postgres_configuration_secret }}' key: username - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: - name: '{{ postgres_configuration_secret }}' + name: '{{ _postgres_configuration_secret }}' key: password - name: PGDATA value: '{{ postgres_data_path }}' From 13d9e62db05aec7494a20b8d927d0a800ad7b778 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Mon, 11 Mar 2024 15:08:26 -0400 Subject: [PATCH 09/27] Fix unsafe conditional and do not explicitly scale up deployment after restoring --- roles/restore/tasks/deploy_galaxy.yml | 3 ++- roles/restore/tasks/postgres.yml | 17 ----------------- roles/restore/tasks/scale_down_deployments.yml | 2 +- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/roles/restore/tasks/deploy_galaxy.yml b/roles/restore/tasks/deploy_galaxy.yml index 515690da..d42e574f 100644 --- a/roles/restore/tasks/deploy_galaxy.yml +++ b/roles/restore/tasks/deploy_galaxy.yml @@ -18,6 +18,7 @@ cr_spec_strip: "{ " admin_str: "admin_password_secret: {{ admin_password_name }}" storage_str: "object_storage_{{ storage_type | lower }}_secret: {{ storage_secret }}" + storage_key: "object_storage_{{ storage_type | lower }}_secret" container_token_str: "container_token_secret: {{ container_token_secret }}" db_fields_str: "db_fields_encryption_secret: {{ db_fields_encryption_secret }}" signing_str: "signing_secret: {{ signing_secret }}" @@ -39,7 +40,7 @@ set_fact: cr_spec_strip: "{{ cr_spec_strip + ', ' + storage_str }}" when: - - not 'object_storage_{{ storage_type | lower }}_secret' in cr_object.stdout + - not storage_key in cr_object.stdout - storage_type | lower != 'file' - name: Set custom resource spec container token from backup diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index df8c549c..14b2c9dc 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -159,20 +159,3 @@ " register: data_migration no_log: "{{ no_log }}" - -- name: Scale up web and content Deployments using replicas from the Galaxy CR - kubernetes.core.k8s_scale: - api_version: apps/v1 - kind: Deployment - name: "{{ item.name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - replicas: "{{ item.replicas }}" - wait: yes - loop: - - name: "{{ ansible_operator_meta.name }}-web" - replicas: "{{ cr_spec['web']['replicas'] }}" - - name: "{{ ansible_operator_meta.name }}-content" - replicas: "{{ cr_spec['content']['replicas'] }}" - when: - - cr_spec['web']['replicas'] | int > 0 - - cr_spec['content']['replicas'] | int > 0 diff --git a/roles/restore/tasks/scale_down_deployments.yml b/roles/restore/tasks/scale_down_deployments.yml index b48f22f8..455ef6de 100644 --- a/roles/restore/tasks/scale_down_deployments.yml +++ b/roles/restore/tasks/scale_down_deployments.yml @@ -5,7 +5,7 @@ kind: Deployment namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - - 'app.kubernetes.io/part-of={{ deploy_name }}' + - 'app.kubernetes.io/part-of={{ deployment_name }}' - 'app.kubernetes.io/managed-by={{ deployment_type }}-operator' register: _deployments From ee6450d7c0dacd09266bf936c0c80dc257190447 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Mon, 11 Mar 2024 16:10:04 -0400 Subject: [PATCH 10/27] Specify Redis 7 image explicitly so that when we bump the version it will be intentional --- config/manager/manager.yaml | 2 +- .../bases/galaxy-operator.clusterserviceversion.yaml | 2 +- roles/postgres/defaults/main.yml | 2 +- roles/postgres/tasks/upgrade_postgres.yml | 12 ------------ roles/redis/README.md | 2 +- roles/redis/defaults/main.yml | 2 +- 6 files changed, 5 insertions(+), 17 deletions(-) diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 283ec7ed..38f9c5f7 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -59,7 +59,7 @@ spec: - name: RELATED_IMAGE_GALAXY_WEB value: quay.io/ansible/galaxy-ui:latest - name: RELATED_IMAGE_GALAXY_REDIS - value: redis:latest + value: redis:7 - name: RELATED_IMAGE_GALAXY_POSTGRES value: quay.io/sclorg/postgresql-15-c9s:latest - name: RELATED_IMAGE_GALAXY_INIT_GPG_CONTAINER diff --git a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml index 0d4ed750..c4bf5c84 100644 --- a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml @@ -963,7 +963,7 @@ spec: - name: RELATED_IMAGE_GALAXY_WEB value: quay.io/ansible/galaxy-ui:latest - name: RELATED_IMAGE_GALAXY_REDIS - value: redis:latest + value: redis:7 - name: RELATED_IMAGE_GALAXY_POSTGRES value: quay.io/sclorg/postgresql-15-c9s:latest - name: RELATED_IMAGE_GALAXY_INIT_GPG_CONTAINER diff --git a/roles/postgres/defaults/main.yml b/roles/postgres/defaults/main.yml index 0c0eeed3..e392e353 100644 --- a/roles/postgres/defaults/main.yml +++ b/roles/postgres/defaults/main.yml @@ -1,5 +1,5 @@ --- -_postgres_image: postgres +_postgres_image: quay.io/sclorg/postgresql-15-c9s:latest postgres_storage_requirements: requests: diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index ef2a0fcf..740b3aa9 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -122,18 +122,6 @@ set_fact: postgres_pod_name: "{{ postgres_pod['resources'][0]['metadata']['name'] }}" -- name: Get the name of the service for the old postgres pod - k8s_info: - kind: Service - namespace: "{{ ansible_operator_meta.namespace }}" - label_selectors: - - "app.kubernetes.io/component=database" - - "app.kubernetes.io/instance=postgres-{{ ansible_operator_meta.name }}" - - "app.kubernetes.io/version={{ supported_pg_version }}" - - "app.kubernetes.io/managed-by={{ deployment_type }}-operator" - register: old_postgres_svc - -# TODO: The galaxy-operator labels are different. potential issue here. - name: Get the name of the service for the old postgres pod k8s_info: kind: Service diff --git a/roles/redis/README.md b/roles/redis/README.md index be6e7a84..c9020cb5 100644 --- a/roles/redis/README.md +++ b/roles/redis/README.md @@ -10,7 +10,7 @@ A role to setup Galaxyredis, yielding the following objects: Role Variables -------------- -* `redis_image`: The redis image name. Default: redis:latest +* `redis_image`: The redis image name. Default: redis:7 Requirements ------------ diff --git a/roles/redis/defaults/main.yml b/roles/redis/defaults/main.yml index 84576d0b..afbb17c9 100644 --- a/roles/redis/defaults/main.yml +++ b/roles/redis/defaults/main.yml @@ -1,5 +1,5 @@ --- -_redis_image: redis:latest +_redis_image: redis:7 redis_storage_size: 1Gi # Here we use _galaxy_ansible_com_galaxy to get un-modified cr From a14842dd852adbe73e60c57039c44ccffcca77a2 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Mon, 11 Mar 2024 19:00:10 -0400 Subject: [PATCH 11/27] Try setting deployment_name to satisfy CI --- config/samples/galaxy_v1beta1_galaxyrestore_cr.ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/samples/galaxy_v1beta1_galaxyrestore_cr.ci.yaml b/config/samples/galaxy_v1beta1_galaxyrestore_cr.ci.yaml index b44509cf..8665a1e0 100644 --- a/config/samples/galaxy_v1beta1_galaxyrestore_cr.ci.yaml +++ b/config/samples/galaxy_v1beta1_galaxyrestore_cr.ci.yaml @@ -4,5 +4,6 @@ kind: GalaxyRestore metadata: name: ci-galaxyrestore spec: + deployment_name: example-galaxy backup_name: ci-galaxybackup no_log: false From ee64c4c398741f358da7456a843e6bc9bfe7d92f Mon Sep 17 00:00:00 2001 From: Dimitri Savineau Date: Mon, 25 Mar 2024 17:33:01 -0400 Subject: [PATCH 12/27] postgresql: Cast sorted_old_postgres_pods as list With ansible 2.9.27 (operator-sdk v1.27.0) then the reverse filter returns an iterator so we need to cast it to list. The behavior doesn't exist when using a more recent operator-sdk version like v1.34.0 (ansible-core 2.15.8) but using the list filter on that version works too (even if not needed) "sorted_old_postgres_pods": "" Signed-off-by: Dimitri Savineau --- roles/postgres/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/postgres/tasks/main.yml b/roles/postgres/tasks/main.yml index e0491831..cd86b481 100644 --- a/roles/postgres/tasks/main.yml +++ b/roles/postgres/tasks/main.yml @@ -56,7 +56,7 @@ set_fact: sorted_old_postgres_pods: "{{ filtered_old_postgres_pods | sort(attribute='metadata.name') | - reverse }}" + reverse | list }}" when: filtered_old_postgres_pods | length - name: Set info for previous postgres pod From 07495297eeaa518fd00fa839329da2a894ccba85 Mon Sep 17 00:00:00 2001 From: Dimitri Savineau Date: Mon, 25 Mar 2024 17:35:06 -0400 Subject: [PATCH 13/27] postgresql: Grant postgres role to galaxy During the postgresql upgrade, we need to grant temporary the postgres role to the galaxy postgresql user and remove it after the pg_restore is over. pg_restore: error: could not execute query: ERROR: must be member of role "postgres" Command was: ALTER SCHEMA public OWNER TO postgres; Signed-off-by: Dimitri Savineau --- roles/postgres/tasks/upgrade_postgres.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index 740b3aa9..805d7d9b 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -215,7 +215,9 @@ trap 'end_keepalive \"$keepalive_file\" \"$keepalive_pid\"' EXIT SIGINT SIGTERM echo keepalive_pid: $keepalive_pid set -e -o pipefail + psql -c 'GRANT postgres TO {{ postgres_user }}' PGPASSWORD=\"$POSTGRES_PASSWORD\" {{ pgdump }} | PGPASSWORD=\"$POSTGRES_PASSWORD\" {{ pg_restore }} + psql -c 'REVOKE postgres FROM {{ postgres_user }}' set +e +o pipefail echo 'Successful' " From c0028ac2a481503fadd488ed1983626ccebf8ec1 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Thu, 28 Mar 2024 14:47:11 -0400 Subject: [PATCH 14/27] Change default postgres_data_path to that of the new sclorg image --- roles/postgres/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/postgres/defaults/main.yml b/roles/postgres/defaults/main.yml index e392e353..195573db 100644 --- a/roles/postgres/defaults/main.yml +++ b/roles/postgres/defaults/main.yml @@ -6,7 +6,7 @@ postgres_storage_requirements: storage: 8Gi postgres_resource_requirements: {} -postgres_data_path: '/var/lib/postgresql/data/pgdata' +postgres_data_path: '/var/lib/pgsql/data/userdata' postgres_initdb_args: '--auth-host=scram-sha-256' postgres_host_auth_method: 'scram-sha-256' From 84323d96dccc9d300b38276f779c79c66d0b779f Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Wed, 3 Apr 2024 15:46:30 -0400 Subject: [PATCH 15/27] Remove the ability to customize the postgres_data_dir * in the sclorg Postgresql 15 image, the PGDATA directory is hardcoded * if users were to modify this directory, they would only change the directory the pvc is mounted to, not the directory PostgreSQL uses. This would result in loss of data. * switch from /var/lib/pgsql/data/pgdata to /var/lib/pgsql/data/userdata Signed-off-by: Christian M. Adams --- config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml | 3 --- .../bases/galaxy-operator.clusterserviceversion.yaml | 5 ----- roles/postgres/defaults/main.yml | 2 -- roles/postgres/tasks/main.yml | 2 +- roles/postgres/templates/postgres.yaml.j2 | 6 +++--- roles/postgres/vars/main.yml | 1 + 6 files changed, 5 insertions(+), 14 deletions(-) diff --git a/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml b/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml index 5b54b2c9..b900d838 100644 --- a/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml +++ b/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml @@ -135,9 +135,6 @@ spec: postgres_storage_class: description: Storage class to use for the PostgreSQL PVC type: string - postgres_data_path: - description: Path where the PostgreSQL data are located - type: string postgres_extra_args: type: array items: diff --git a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml index c4bf5c84..c7a602d2 100644 --- a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml @@ -100,11 +100,6 @@ spec: path: postgres_resource_requirements x-descriptors: - urn:alm:descriptor:com.tectonic.ui:advanced - - displayName: Database data path - path: postgres_data_path - x-descriptors: - - urn:alm:descriptor:com.tectonic.ui:advanced - - urn:alm:descriptor:com.tectonic.ui:hidden - displayName: Postgres Extra Arguments path: postgres_extra_args x-descriptors: diff --git a/roles/postgres/defaults/main.yml b/roles/postgres/defaults/main.yml index 195573db..f47ef628 100644 --- a/roles/postgres/defaults/main.yml +++ b/roles/postgres/defaults/main.yml @@ -6,8 +6,6 @@ postgres_storage_requirements: storage: 8Gi postgres_resource_requirements: {} -postgres_data_path: '/var/lib/pgsql/data/userdata' - postgres_initdb_args: '--auth-host=scram-sha-256' postgres_host_auth_method: 'scram-sha-256' diff --git a/roles/postgres/tasks/main.yml b/roles/postgres/tasks/main.yml index cd86b481..32d1effa 100644 --- a/roles/postgres/tasks/main.yml +++ b/roles/postgres/tasks/main.yml @@ -103,7 +103,7 @@ block: - name: Set path to PG_VERSION file for given container image set_fact: - path_to_pg_version: '{{ postgres_data_path }}/PG_VERSION' + path_to_pg_version: '{{ _postgres_data_path }}/PG_VERSION' path_to_pg_version_old: '/var/lib/postgresql/data/pgdata/PG_VERSION' - name: Get old PostgreSQL version (Checking old pg data path too) diff --git a/roles/postgres/templates/postgres.yaml.j2 b/roles/postgres/templates/postgres.yaml.j2 index f52f4fd2..26a6b13b 100644 --- a/roles/postgres/templates/postgres.yaml.j2 +++ b/roles/postgres/templates/postgres.yaml.j2 @@ -86,7 +86,7 @@ spec: name: '{{ _postgres_configuration_secret }}' key: password - name: PGDATA - value: '{{ postgres_data_path }}' + value: '{{ _postgres_data_path }}' - name: POSTGRES_INITDB_ARGS value: '{{ postgres_initdb_args }}' - name: POSTGRES_HOST_AUTH_METHOD @@ -129,8 +129,8 @@ spec: successThreshold: 1 volumeMounts: - name: postgres - mountPath: '{{ postgres_data_path | dirname }}' - subPath: '{{ postgres_data_path | dirname | basename }}' + mountPath: '{{ _postgres_data_path | dirname }}' + subPath: '{{ _postgres_data_path | dirname | basename }}' resources: {{ postgres_resource_requirements }} {% if postgres_selector %} nodeSelector: diff --git a/roles/postgres/vars/main.yml b/roles/postgres/vars/main.yml index c65faa4b..4e305a0d 100644 --- a/roles/postgres/vars/main.yml +++ b/roles/postgres/vars/main.yml @@ -2,3 +2,4 @@ supported_pg_version: 15 _previous_upgraded_pg_version: 0 old_postgres_pod: [] +_postgres_data_path: '/var/lib/pgsql/data/userdata' From b81abd9cdceb89221f41eb13816decbcc893c201 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Wed, 3 Apr 2024 16:01:16 -0400 Subject: [PATCH 16/27] Add initContainer to initial Postgres data volume permissions if needed This is for k8s deployments only. Add postgres init container if postgres_data_volume_init is true This is aimed to solve the issue where users may need to chmod or chown the postgres data volume for user 26, which is the user that is running postgres in the sclorg image. For example, one can now set the follow on the AWX spec: spec: postgres_init_container_commands: | chown 26:0 /var/lib/pgsql/data chmod 700 /var/lib/pgsql/data --- .../crd/bases/galaxy_v1beta1_galaxy_crd.yaml | 6 ++++++ .../galaxy-operator.clusterserviceversion.yaml | 9 +++++++++ roles/postgres/defaults/main.yml | 5 +++++ roles/postgres/templates/postgres.yaml.j2 | 18 ++++++++++++++++++ 4 files changed, 38 insertions(+) diff --git a/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml b/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml index b900d838..a0ac4c6f 100644 --- a/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml +++ b/config/crd/bases/galaxy_v1beta1_galaxy_crd.yaml @@ -139,6 +139,12 @@ spec: type: array items: type: string + postgres_data_volume_init: + description: Sets permissions on the /var/lib/pgdata/data for postgres container using an init container (not Openshift) + type: boolean + postgres_init_container_commands: + description: Customize the postgres init container commands (Non Openshift) + type: string postgres_migrant_configuration_secret: description: Secret where the old database configuration can be found for data migration type: string diff --git a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml index c7a602d2..987eea59 100644 --- a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml @@ -114,6 +114,15 @@ spec: path: postgres_tolerations x-descriptors: - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Sets permissions on the /var/lib/pgsql/data for postgres container using an init container (not Openshift) + displayName: PostgreSQL initialize data volume + path: postgres_data_volume_init + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:hidden + - description: Customize the postgres init container commands (Non Openshift) + displayName: PostgreSQL Init Container Commands + path: postgres_init_container_commands + x-descriptors: - urn:alm:descriptor:com.tectonic.ui:hidden - displayName: Database storage class path: postgres_storage_class diff --git a/roles/postgres/defaults/main.yml b/roles/postgres/defaults/main.yml index f47ef628..aef08f72 100644 --- a/roles/postgres/defaults/main.yml +++ b/roles/postgres/defaults/main.yml @@ -33,6 +33,11 @@ postgres_tolerations: '' # Define postgres configuration arguments to use postgres_extra_args: '' +postgres_data_volume_init: false +postgres_init_container_commands: | + chown 26:0 /var/lib/pgsql/data + chmod 700 /var/lib/pgsql/data + custom_resource_key: '_galaxy_ansible_com_galaxy' database_status_present: false diff --git a/roles/postgres/templates/postgres.yaml.j2 b/roles/postgres/templates/postgres.yaml.j2 index 26a6b13b..c20d5ffb 100644 --- a/roles/postgres/templates/postgres.yaml.j2 +++ b/roles/postgres/templates/postgres.yaml.j2 @@ -45,6 +45,24 @@ spec: nodeAffinity: {{ _node_affinity }} {% endif %} serviceAccountName: '{{ ansible_operator_meta.name }}' +{% if postgres_data_volume_init and not is_openshift %} + initContainers: + - name: init + image: '{{ _postgres_image }}' + imagePullPolicy: '{{ image_pull_policy }}' + securityContext: + runAsUser: 0 + command: + - /bin/sh + - -c + - | + {{ postgres_init_container_commands | indent(width=14) }} + resources: {{ postgres_resource_requirements }} + volumeMounts: + - name: postgres-{{ supported_pg_version }} + mountPath: '{{ _postgres_data_path | dirname }}' + subPath: '{{ _postgres_data_path | dirname | basename }}' +{% endif %} containers: - image: '{{ _postgres_image }}' name: postgres From 0cf23d0913a2d48b1a0376ec81c82d0e95656609 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Wed, 3 Apr 2024 16:37:46 -0400 Subject: [PATCH 17/27] Add database configuration docs --- docs/user-guide/database-configuration.md | 114 ++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 docs/user-guide/database-configuration.md diff --git a/docs/user-guide/database-configuration.md b/docs/user-guide/database-configuration.md new file mode 100644 index 00000000..09a8b440 --- /dev/null +++ b/docs/user-guide/database-configuration.md @@ -0,0 +1,114 @@ +### Database Configuration + +#### PostgreSQL Version + +The default PostgreSQL version for the version of Galaxy bundled with the latest version of the galaxy-operator is PostgreSQL 15. You can find this default for a given version by at the default value for [supported_pg_version](./roles/installer/vars/main.yml#L7). + +We only have coverage for the default version of PostgreSQL. Newer versions of PostgreSQL will likely work, but should only be configured as an external database. If your database is managed by the galaxy-operator (default if you don't specify a `postgres_configuration_secret`), then you should not override the default version as this may cause issues when the operator tries to upgrade your postgresql pod. + +#### External PostgreSQL Service + +To configure Galaxy to use an external database, the Custom Resource needs to know about the connection details. To do this, create a k8s secret with those connection details and specify the name of the secret as `postgres_configuration_secret` at the CR spec level. + + +The secret should be formatted as follows: + +```yaml +--- +apiVersion: v1 +kind: Secret +metadata: + name: -postgres-configuration + namespace: +stringData: + host: + port: + database: + username: + password: + sslmode: prefer + type: unmanaged +type: Opaque +``` + +> Please ensure that the value for the variable `password` should _not_ contain single or double quotes (`'`, `"`) or backslashes (`\`) to avoid any issues during deployment, [backup](./roles/backup) or [restoration](./roles/restore). + +> It is possible to set a specific username, password, port, or database, but still have the database managed by the operator. In this case, when creating the postgres-configuration secret, the `type: managed` field should be added. + +**Note**: The variable `sslmode` is valid for `external` databases only. The allowed values are: `prefer`, `disable`, `allow`, `require`, `verify-ca`, `verify-full`. + +Once the secret is created, you can specify it on your spec: + +```yaml +--- +spec: + ... + postgres_configuration_secret: +``` + +#### Managed PostgreSQL Service + +If you don't have access to an external PostgreSQL service, the galaxy-operator can deploy one for you along side the Galaxy instance itself. + +The following variables are customizable for the managed PostgreSQL service + +| Name | Description | Default | +| --------------------------------------------- | --------------------------------------------------------------- | --------------------------------------- | +| postgres_image | Path of the image to pull | quay.io/sclorg/postgresql-15-c9s | +| postgres_image_version | Image version to pull | latest | +| postgres_resource_requirements | PostgreSQL container (and initContainer) resource requirements | requests: {cpu: 10m, memory: 64Mi} | +| postgres_storage_requirements | PostgreSQL container storage requirements | requests: {storage: 8Gi} | +| postgres_storage_class | PostgreSQL PV storage class | Empty string | + +Example of customization could be: + +```yaml +--- +spec: + ... + postgres_resource_requirements: + requests: + cpu: 500m + memory: 2Gi + limits: + cpu: '1' + memory: 4Gi + postgres_storage_requirements: + requests: + storage: 8Gi + limits: + storage: 50Gi + postgres_storage_class: fast-ssd + postgres_extra_args: + - '-c' + - 'max_connections=1000' +``` + +**Note**: If `postgres_storage_class` is not defined, PostgreSQL will store it's data on a volume using the default storage class for your cluster. + +#### Note about overriding the postgres image + +We recommend you use the default image sclorg image. If you are coming from a deployment using the old postgres image from dockerhub (postgres:13), upgrading from galaxy-operator version 2024.02.29 to a newer version will handle migrating your data to the new postgresql image (postgresql-15-c9s). + +You can no longer configure a custom `postgres_data_path` because it is hardcoded in the quay.io/sclorg/postgresql-15-c9s image. + +If you override the postgres image to use a custom postgres image like postgres:15 for example, the default data directory path may be different. These images cannot be used interchangeably. + +#### Initialize Postgres data volume + +When using a hostPath backed PVC and some other storage classes like longhorn storagfe, the postgres data directory needs to be accessible by the user in the postgres pod (UID 26). + +To initialize this directory with the correct permissions, configure the following setting, which will use an init container to set the permissions in the postgres volume. + +```yaml +spec: + postgres_data_volume_init: true +``` + +Should you need to modify the init container commands, there is an example below. + +```yaml +postgres_init_container_commands: | + chown 26:0 /var/lib/pgsql/data + chmod 700 /var/lib/pgsql/data +``` From 266cc7fa587a68b3327ef41d071da2f95285b463 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Wed, 3 Apr 2024 16:40:28 -0400 Subject: [PATCH 18/27] Refactor backup content logic into k8s job to enable custom securityContext - Set runAsUser to 1000 (galaxy user) for management pods in k8s so that it can access the contents of /var/lib/pulp - Add initContainer for copying content from /var/lib/pulp during backups - Add separate k8s job for copying content for file storage during restores - add rbac rules for cronjobs and jobs to operator SA - In k8s, set content pod user to 1000 like in the api deployment - Set UID 1000 in k8s for backup and restore management pods - Add a ttl for the k8s jobs so that the content PVC can be deleted if desired without ownerReference issues --- .github/workflows/pr.yml | 16 +++--- config/rbac/role.yaml | 13 +++++ docs/build.md | 4 +- docs/quickstart.md | 4 +- docs/user-guide/database-configuration.md | 4 +- roles/backup/tasks/init.yml | 22 ++++++++ roles/backup/tasks/main.yml | 9 +++- roles/backup/tasks/postgres.yml | 15 ------ roles/backup/tasks/remove_management_pod.yml | 8 +++ roles/backup/tasks/storage.yml | 21 -------- .../templates/backup-content-k8s-job.yaml.j2 | 50 +++++++++++++++++++ roles/backup/templates/management-pod.yaml.j2 | 11 +++- roles/backup/vars/main.yml | 3 ++ .../galaxy-content.deployment.yaml.j2 | 4 +- roles/postgres/templates/postgres.yaml.j2 | 2 +- roles/restore/tasks/cleanup.yml | 8 --- roles/restore/tasks/init.yml | 14 ++++++ roles/restore/tasks/main.yml | 9 +++- roles/restore/tasks/postgres.yml | 2 +- roles/restore/tasks/remove_management_pod.yml | 8 +++ roles/restore/tasks/storage.yml | 29 ----------- .../restore/templates/management-pod.yaml.j2 | 10 +++- .../templates/restore-content-k8s-job.yaml.j2 | 50 +++++++++++++++++++ roles/restore/vars/main.yml | 3 ++ 24 files changed, 226 insertions(+), 93 deletions(-) create mode 100644 roles/backup/tasks/remove_management_pod.yml delete mode 100644 roles/backup/tasks/storage.yml create mode 100644 roles/backup/templates/backup-content-k8s-job.yaml.j2 create mode 100644 roles/restore/tasks/remove_management_pod.yml delete mode 100644 roles/restore/tasks/storage.yml create mode 100644 roles/restore/templates/restore-content-k8s-job.yaml.j2 diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 651532e7..90e46ce2 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -14,12 +14,12 @@ jobs: include: - STORAGE: filesystem IMAGE: minimal - - STORAGE: filesystem - IMAGE: s6 - - STORAGE: azure - IMAGE: minimal - - STORAGE: s3 - IMAGE: minimal + # - STORAGE: filesystem + # IMAGE: s6 + # - STORAGE: azure + # IMAGE: minimal + # - STORAGE: s3 + # IMAGE: minimal steps: - name: PR head repo id: head_repo_name @@ -122,6 +122,10 @@ jobs: run: sudo -E .ci/scripts/galaxy_ng-tests.sh -m shell: bash + # # TODO: Remove after testing + # - name: Setup tmate session + # uses: mxschmitt/action-tmate@v3 + - name: Backup & Restore run: CI_TEST=galaxy .ci/scripts/backup_and_restore.sh -m shell: bash diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index a661d38a..7bcf74b4 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -91,6 +91,19 @@ rules: verbs: - patch - get + - apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - get + - list + - create + - patch + - update + - watch + ## ## Rules for galaxy.ansible.com/v1beta1, Kind: Galaxy ## diff --git a/docs/build.md b/docs/build.md index 1c9a596b..a52f1455 100644 --- a/docs/build.md +++ b/docs/build.md @@ -127,7 +127,7 @@ pod/galaxy-api-5d4d945787-jq2kk 1/1 Running 0 pod/galaxy-content-754466b885-8c85x 1/1 Running 0 2m41s pod/galaxy-content-754466b885-bgzz7 1/1 Running 0 2m41s pod/galaxy-operator-controller-manager-d84cd6d4c-2zpw5 2/2 Running 0 3m55s -pod/galaxy-postgres-13-0 1/1 Running 0 3m +pod/galaxy-postgres-15-0 1/1 Running 0 3m pod/galaxy-redis-b77c7ccb-zqdv6 1/1 Running 0 2m30s pod/galaxy-web-dc44cff56-k46j2 1/1 Running 0 2m53s pod/galaxy-worker-64f7889dd7-t5jdd 1/1 Running 0 2m36s @@ -154,4 +154,4 @@ Password: ``` You can now access Galaxy in your browser by visiting [http://localhost](http://localhost) or [https://localhost](https://localhost): image -image \ No newline at end of file +image diff --git a/docs/quickstart.md b/docs/quickstart.md index eb5abf46..45b2c793 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -83,7 +83,7 @@ galaxy-api-5d4d945787-jtc59 1/1 Running 0 galaxy-content-754466b885-cmmp4 1/1 Running 0 3m23s galaxy-content-754466b885-zsfqq 1/1 Running 0 3m23s galaxy-operator-controller-manager-5f75d85bf8-j49mr 2/2 Running 0 4m21s -galaxy-postgres-13-0 1/1 Running 0 3m40s +galaxy-postgres-15-0 1/1 Running 0 3m40s galaxy-redis-994cbcbff-9rf55 1/1 Running 0 3m11s galaxy-web-dc44cff56-lmshc 1/1 Running 0 3m33s galaxy-worker-64f7889dd7-9lvkm 1/1 Running 0 3m17s @@ -111,4 +111,4 @@ Password: ``` You can now access Galaxy in your browser by visiting [http://localhost](http://localhost) or [https://localhost](https://localhost): image -image \ No newline at end of file +image diff --git a/docs/user-guide/database-configuration.md b/docs/user-guide/database-configuration.md index 09a8b440..ceb830b1 100644 --- a/docs/user-guide/database-configuration.md +++ b/docs/user-guide/database-configuration.md @@ -2,7 +2,7 @@ #### PostgreSQL Version -The default PostgreSQL version for the version of Galaxy bundled with the latest version of the galaxy-operator is PostgreSQL 15. You can find this default for a given version by at the default value for [supported_pg_version](./roles/installer/vars/main.yml#L7). +The default PostgreSQL version for the version of Galaxy bundled with the latest version of the galaxy-operator is PostgreSQL 15. You can find this default for a given version by at the default value for [supported_pg_version](https://github.com/ansible/galaxy-operator/tree/main/roles/installer/vars/main.yml#L7). We only have coverage for the default version of PostgreSQL. Newer versions of PostgreSQL will likely work, but should only be configured as an external database. If your database is managed by the galaxy-operator (default if you don't specify a `postgres_configuration_secret`), then you should not override the default version as this may cause issues when the operator tries to upgrade your postgresql pod. @@ -31,7 +31,7 @@ stringData: type: Opaque ``` -> Please ensure that the value for the variable `password` should _not_ contain single or double quotes (`'`, `"`) or backslashes (`\`) to avoid any issues during deployment, [backup](./roles/backup) or [restoration](./roles/restore). +> Please ensure that the value for the variable `password` should _not_ contain single or double quotes (`'`, `"`) or backslashes (`\`) to avoid any issues during deployment, [backup](https://github.com/ansible/galaxy-operator/tree/main/roles/backup) or [restoration](https://github.com/ansible/galaxy-operator/tree/main/roles/restore). > It is possible to set a specific username, password, port, or database, but still have the database managed by the operator. In this case, when creating the postgres-configuration secret, the `type: managed` field should be added. diff --git a/roles/backup/tasks/init.yml b/roles/backup/tasks/init.yml index c096ba20..a8393317 100644 --- a/roles/backup/tasks/init.yml +++ b/roles/backup/tasks/init.yml @@ -178,8 +178,30 @@ set_fact: _postgres_image: "{{ _custom_postgres_image | default(lookup('env', 'RELATED_IMAGE_GALAXY_POSTGRES')) | default(_postgres_image, true) }}" +- name: Determine the timestamp for the backup once for all nodes + set_fact: + now: '{{ lookup("pipe", "date +%F-%H%M%S") }}' + +- name: Set backup directory name + set_fact: + _backup_dir: "/backups/openshift-backup-{{ now }}" + - name: Create management pod from templated deployment config k8s: state: present definition: "{{ lookup('template', 'templates/management-pod.yaml.j2') | from_yaml }}" wait: true + +- name: Wait for backup management pod to be running + k8s_info: + kind: Pod + namespace: "{{ ansible_operator_meta.namespace }}" + label_selectors: + - "app.kubernetes.io/component=backup-manager" + - "app.kubernetes.io/instance={{ deployment_type }}-backup-manager-{{ ansible_operator_meta.name }}" + register: backup_pod_info + until: + - backup_pod_info['resources'] | length + - backup_pod_info['resources'][0]['status']['phase'] == 'Running' + retries: 120 + delay: 10 diff --git a/roles/backup/tasks/main.yml b/roles/backup/tasks/main.yml index 5d4b793b..f19b5b63 100644 --- a/roles/backup/tasks/main.yml +++ b/roles/backup/tasks/main.yml @@ -6,13 +6,20 @@ custom_resource_status: "{{ hostvars[inventory_hostname][custom_resource_key]['status'] }}" - block: + - include_tasks: ../../common/tasks/check_k8s_or_openshift.yml + - include_tasks: init.yml - include_tasks: postgres.yml - include_tasks: secrets.yml - - include_tasks: storage.yml + - include_tasks: remove_management_pod.yml + + - name: Apply kubernetes job to backup /var/lib/pulp content + k8s: + state: present + definition: "{{ lookup('template', 'backup-content-k8s-job.yaml.j2') }}" when: storage_type | lower == 'file' - name: Set flag signifying this backup was successful diff --git a/roles/backup/tasks/postgres.yml b/roles/backup/tasks/postgres.yml index 7a03bb54..8d230ff1 100644 --- a/roles/backup/tasks/postgres.yml +++ b/roles/backup/tasks/postgres.yml @@ -55,21 +55,6 @@ postgres_pod_name: "{{ postgres_pod['resources'][0]['metadata']['name'] }}" when: postgres_type == 'managed' -- name: Determine the timestamp for the backup once for all nodes - set_fact: - now: '{{ lookup("pipe", "date +%F-%H%M%S") }}' - -- name: Set backup directory name - set_fact: - _backup_dir: "/backups/openshift-backup-{{ now }}" - -- name: Create directory for backup - k8s_exec: - namespace: "{{ backup_pvc_namespace }}" - pod: "{{ ansible_operator_meta.name }}-backup-manager" - command: >- - mkdir -p {{ _backup_dir }} - - name: Precreate file for database dump k8s_exec: namespace: "{{ backup_pvc_namespace}}" diff --git a/roles/backup/tasks/remove_management_pod.yml b/roles/backup/tasks/remove_management_pod.yml new file mode 100644 index 00000000..31129677 --- /dev/null +++ b/roles/backup/tasks/remove_management_pod.yml @@ -0,0 +1,8 @@ +--- +- name: Delete any existing management pod + k8s: + name: "{{ ansible_operator_meta.name }}-backup-manager" + kind: Pod + namespace: "{{ backup_pvc_namespace }}" + state: absent + force: true diff --git a/roles/backup/tasks/storage.yml b/roles/backup/tasks/storage.yml deleted file mode 100644 index a3a4cf48..00000000 --- a/roles/backup/tasks/storage.yml +++ /dev/null @@ -1,21 +0,0 @@ ---- - -- name: Create directory for backup content files - k8s_exec: - namespace: "{{ backup_pvc_namespace }}" - pod: "{{ ansible_operator_meta.name }}-backup-manager" - command: >- - mkdir -p {{ _backup_dir }}/pulp/ - -- name: Set file copy command - set_fact: - copy_cmd: >- - cp -fr /var/lib/pulp/. {{ _backup_dir }}/pulp - -- name: Write content files to backup on PVC - k8s_exec: - namespace: "{{ backup_pvc_namespace }}" - pod: "{{ ansible_operator_meta.name }}-backup-manager" - command: >- - bash -c "{{ copy_cmd }}" - register: file_copy diff --git a/roles/backup/templates/backup-content-k8s-job.yaml.j2 b/roles/backup/templates/backup-content-k8s-job.yaml.j2 new file mode 100644 index 00000000..b8e7fbf8 --- /dev/null +++ b/roles/backup/templates/backup-content-k8s-job.yaml.j2 @@ -0,0 +1,50 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ ansible_operator_meta.name }}-backup-content + namespace: {{ backup_pvc_namespace }} + labels: + app.kubernetes.io/name: '{{ deployment_type }}-backup-content' + app.kubernetes.io/instance: '{{ deployment_type }}-backup-content-{{ ansible_operator_meta.name }}' + app.kubernetes.io/component: backup-content + app.kubernetes.io/part-of: '{{ deployment_type }}' + app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' +spec: + ttlSecondsAfterFinished: 60 + template: + metadata: + annotations: + spec: +{% if is_k8s %} + securityContext: + runAsUser: 1000 +{% endif %} + containers: + - name: backup-content + image: quay.io/ansible/galaxy-ng:latest # TODO: Add set image tasks, and var ref here, etc. + imagePullPolicy: Always + command: + - /bin/bash + - -c + - | + mkdir -p {{ _backup_dir }}/pulp/ + cp -fr /var/lib/pulp/. {{ _backup_dir }}/pulp + volumeMounts: + - name: {{ ansible_operator_meta.name }}-backup + mountPath: /backups +{% if storage_claim is defined %} + - name: file-storage + mountPath: "/var/lib/pulp" +{% endif %} + volumes: + - name: {{ ansible_operator_meta.name }}-backup + persistentVolumeClaim: + claimName: {{ backup_claim }} +{% if storage_claim is defined %} + - name: file-storage + persistentVolumeClaim: + claimName: {{ storage_claim }} +{% endif %} + restartPolicy: Never + backoffLimit: 0 diff --git a/roles/backup/templates/management-pod.yaml.j2 b/roles/backup/templates/management-pod.yaml.j2 index 437d903b..a8498420 100644 --- a/roles/backup/templates/management-pod.yaml.j2 +++ b/roles/backup/templates/management-pod.yaml.j2 @@ -11,11 +11,20 @@ metadata: app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' spec: +{% if is_k8s %} + securityContext: + runAsUser: 1000 +{% endif %} containers: - name: {{ ansible_operator_meta.name }}-backup-manager image: "{{ _postgres_image }}" imagePullPolicy: Always - command: ["sleep", "infinity"] + command: + - /bin/bash + - -c + - | + mkdir -p {{ _backup_dir }}/ + sleep infinity volumeMounts: - name: {{ ansible_operator_meta.name }}-backup mountPath: /backups diff --git a/roles/backup/vars/main.yml b/roles/backup/vars/main.yml index 4f11ce13..935f52c2 100644 --- a/roles/backup/vars/main.yml +++ b/roles/backup/vars/main.yml @@ -3,3 +3,6 @@ deployment_type: "galaxy" _postgres_image: quay.io/sclorg/postgresql-15-c9s:latest supported_pg_version: 15 + +is_k8s: false +is_openshift: false diff --git a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 index 2dfa9d22..639f0f69 100644 --- a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 +++ b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 @@ -48,8 +48,8 @@ spec: {% endif %} {% if is_k8s %} securityContext: - runAsUser: 700 - fsGroup: 700 + runAsUser: 1000 + fsGroup: 1000 {% endif %} {% if node_selector %} nodeSelector: diff --git a/roles/postgres/templates/postgres.yaml.j2 b/roles/postgres/templates/postgres.yaml.j2 index c20d5ffb..79f48bf6 100644 --- a/roles/postgres/templates/postgres.yaml.j2 +++ b/roles/postgres/templates/postgres.yaml.j2 @@ -59,7 +59,7 @@ spec: {{ postgres_init_container_commands | indent(width=14) }} resources: {{ postgres_resource_requirements }} volumeMounts: - - name: postgres-{{ supported_pg_version }} + - name: postgres mountPath: '{{ _postgres_data_path | dirname }}' subPath: '{{ _postgres_data_path | dirname | basename }}' {% endif %} diff --git a/roles/restore/tasks/cleanup.yml b/roles/restore/tasks/cleanup.yml index 3f847a52..b1a0a89f 100644 --- a/roles/restore/tasks/cleanup.yml +++ b/roles/restore/tasks/cleanup.yml @@ -1,13 +1,5 @@ --- -- name: Delete any existing management pod - k8s: - name: "{{ ansible_operator_meta.name }}-backup-manager" - kind: Pod - namespace: "{{ backup_pvc_namespace }}" - state: absent - force: true - - name: Remove ownerReferences from admin password secret to avoid garbage collection k8s: definition: diff --git a/roles/restore/tasks/init.yml b/roles/restore/tasks/init.yml index 0049fab0..18db56d2 100644 --- a/roles/restore/tasks/init.yml +++ b/roles/restore/tasks/init.yml @@ -167,6 +167,20 @@ definition: "{{ lookup('template', 'templates/management-pod.yaml.j2') | from_yaml }}" wait: true +- name: Wait for restore management pod to be running + k8s_info: + kind: Pod + namespace: "{{ ansible_operator_meta.namespace }}" + label_selectors: + - "app.kubernetes.io/component=backup-manager" + - "app.kubernetes.io/instance={{ deployment_type }}-backup-manager-{{ ansible_operator_meta.name }}" + register: restore_pod_info + until: + - restore_pod_info['resources'] | length + - restore_pod_info['resources'][0]['status']['phase'] == 'Running' + retries: 120 + delay: 10 + - name: Check to make sure backup directory exists on PVC k8s_exec: namespace: "{{ backup_pvc_namespace}}" diff --git a/roles/restore/tasks/main.yml b/roles/restore/tasks/main.yml index 2c70974a..b68a25a5 100644 --- a/roles/restore/tasks/main.yml +++ b/roles/restore/tasks/main.yml @@ -5,6 +5,8 @@ custom_resource_status: "{{ hostvars[inventory_hostname][custom_resource_key]['status'] | default({}) }}" - block: + - include_tasks: ../../common/tasks/check_k8s_or_openshift.yml + - include_tasks: init.yml - include_tasks: secrets.yml @@ -13,7 +15,12 @@ - include_tasks: postgres.yml - - include_tasks: storage.yml + - include_tasks: remove_management_pod.yml + + - name: Apply kubernetes job to restore /var/lib/pulp content + k8s: + state: present + definition: "{{ lookup('template', 'restore-content-k8s-job.yaml.j2') }}" when: storage_type | lower == 'file' - name: Set flag signifying this restore was successful diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index 14b2c9dc..743d7873 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -83,7 +83,7 @@ when: storage_type | lower == 'file' - name: Scale down deployments for migration - include: scale_down_deployments.yml + include_tasks: scale_down_deployments.yml - name: Set full resolvable host name for postgres pod set_fact: diff --git a/roles/restore/tasks/remove_management_pod.yml b/roles/restore/tasks/remove_management_pod.yml new file mode 100644 index 00000000..31129677 --- /dev/null +++ b/roles/restore/tasks/remove_management_pod.yml @@ -0,0 +1,8 @@ +--- +- name: Delete any existing management pod + k8s: + name: "{{ ansible_operator_meta.name }}-backup-manager" + kind: Pod + namespace: "{{ backup_pvc_namespace }}" + state: absent + force: true diff --git a/roles/restore/tasks/storage.yml b/roles/restore/tasks/storage.yml deleted file mode 100644 index 5f1137d3..00000000 --- a/roles/restore/tasks/storage.yml +++ /dev/null @@ -1,29 +0,0 @@ ---- - -- name: Delete any existing management pod - k8s: - name: "{{ ansible_operator_meta.name }}-backup-manager" - kind: Pod - namespace: "{{ backup_pvc_namespace }}" - state: absent - force: true - wait: true - -- name: Create management pod from templated deployment config - k8s: - state: present - definition: "{{ lookup('template', 'templates/management-pod.yaml.j2') | from_yaml }}" - wait: true - -- name: Set file copy command - set_fact: - copy_cmd: >- - cp -fr {{ backup_dir }}/pulp/. /var/lib/pulp - -- name: Write content files from backup on PVC - k8s_exec: - namespace: "{{ backup_pvc_namespace }}" - pod: "{{ ansible_operator_meta.name }}-backup-manager" - command: >- - bash -c "{{ copy_cmd }}" - register: file_copy diff --git a/roles/restore/templates/management-pod.yaml.j2 b/roles/restore/templates/management-pod.yaml.j2 index 183c65ae..310c3dce 100644 --- a/roles/restore/templates/management-pod.yaml.j2 +++ b/roles/restore/templates/management-pod.yaml.j2 @@ -12,10 +12,18 @@ metadata: app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' spec: +{% if is_k8s %} + securityContext: + runAsUser: 1000 +{% endif %} containers: - name: {{ ansible_operator_meta.name }}-backup-manager image: "{{ _postgres_image }}" - command: ["sleep", "infinity"] + command: + - /bin/bash + - -c + - | + sleep infinity volumeMounts: - name: {{ ansible_operator_meta.name }}-backup mountPath: /backups diff --git a/roles/restore/templates/restore-content-k8s-job.yaml.j2 b/roles/restore/templates/restore-content-k8s-job.yaml.j2 new file mode 100644 index 00000000..77a9b94f --- /dev/null +++ b/roles/restore/templates/restore-content-k8s-job.yaml.j2 @@ -0,0 +1,50 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ ansible_operator_meta.name }}-restore-content + namespace: {{ backup_pvc_namespace }} + labels: + app.kubernetes.io/name: '{{ deployment_type }}-restore-content' + app.kubernetes.io/instance: '{{ deployment_type }}-restore-content-{{ ansible_operator_meta.name }}' + app.kubernetes.io/component: restore-content + app.kubernetes.io/part-of: '{{ deployment_type }}' + app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' +spec: + ttlSecondsAfterFinished: 60 + template: + metadata: + annotations: + spec: +{% if is_k8s %} + securityContext: + runAsUser: 1000 +{% endif %} + containers: + - name: restore-content + image: {{ _postgres_image }} + imagePullPolicy: Always + command: + - /bin/bash + - -c + - | + stat {{ backup_dir }}/pulp/ + cp -fr {{ backup_dir }}/pulp/. /var/lib/pulp + volumeMounts: + - name: {{ ansible_operator_meta.name }}-backup + mountPath: /backups +{% if storage_claim is defined %} + - name: file-storage + mountPath: "/var/lib/pulp" +{% endif %} + volumes: + - name: {{ ansible_operator_meta.name }}-backup + persistentVolumeClaim: + claimName: {{ backup_pvc }} +{% if storage_claim is defined %} + - name: file-storage + persistentVolumeClaim: + claimName: {{ storage_claim }} +{% endif %} + restartPolicy: Never + backoffLimit: 0 diff --git a/roles/restore/vars/main.yml b/roles/restore/vars/main.yml index de4d7159..888a2786 100644 --- a/roles/restore/vars/main.yml +++ b/roles/restore/vars/main.yml @@ -11,3 +11,6 @@ backup_kind: 'GalaxyBackup' # If set to true, the restore process will delete the existing database and create a new one force_drop_db: false pg_drop_create: '' + +is_k8s: false +is_openshift: false From 8d5ca16a8a0a45190ec2faae3d15768b988cab31 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Thu, 18 Apr 2024 17:52:51 -0400 Subject: [PATCH 19/27] Turn off bash history expansion temporarily for restore * This makes fixes a bug where ! characters in the PGPASSWORD are not parsed correctly --- roles/restore/tasks/postgres.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index 743d7873..15d23ea0 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -152,7 +152,9 @@ echo keepalive_pid: $keepalive_pid set -e -o pipefail {{ pg_drop_create }} + set +H # Disable history expansion cat {{ backup_dir }}/pulp.db | PGPASSWORD='{{ postgres_pass }}' {{ pg_restore }} + set -H # Re-enable history expansion PG_RC=$? set +e +o pipefail exit $PG_RC From 1ec057a62ceccdcff5505163647bc577e52d4f17 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Thu, 18 Apr 2024 17:54:28 -0400 Subject: [PATCH 20/27] Add --ansible-log-events flag to Dockerfile to make it easier to change verbosity * Options are documented here: https://sdk.operatorframework.io/docs/building-operators/ansible/reference/advanced_options/#using-ansible-log-events --- Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b6c7e043..94b961a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,4 +22,8 @@ COPY watches.yaml ${HOME}/watches.yaml COPY roles/ ${HOME}/roles/ COPY playbooks/ ${HOME}/playbooks/ -ENTRYPOINT ["/tini", "--", "/usr/local/bin/ansible-operator", "run", "--watches-file=./watches.yaml", "--reconcile-period=0s"] +ENTRYPOINT ["/tini", "--", "/usr/local/bin/ansible-operator", "run", \ + "--watches-file=./watches.yaml", \ + "--reconcile-period=0s", \ + "--ansible-log-events=Tasks" \ +] From aedd5b621979e037e0ebab3c8a16010802778aa6 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 19 Apr 2024 00:54:43 -0400 Subject: [PATCH 21/27] Set new postgres configuration secret if managed database --- roles/restore/tasks/deploy_galaxy.yml | 13 ++++++++----- roles/restore/tasks/postgres.yml | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/roles/restore/tasks/deploy_galaxy.yml b/roles/restore/tasks/deploy_galaxy.yml index d42e574f..37049fb4 100644 --- a/roles/restore/tasks/deploy_galaxy.yml +++ b/roles/restore/tasks/deploy_galaxy.yml @@ -25,6 +25,14 @@ db_str: "postgres_configuration_secret: {{ db_secret_name }}" sso_str: "sso_secret: {{ sso_secret }}" +# TODO: Convert all of these tasks to use the combine filter +- name: Remove postgres_configuration_secret from spec + set_fact: + cr_object: "{{ cr_object | combine({'postgres_configuration_secret': db_str}, recursive=True) }}" + when: + - database_type is defined + - database_type == 'managed' + - name: Strip end characters from spec set_fact: cr_spec_strip: "{{ cr_object.stdout[:-2] }}" @@ -60,11 +68,6 @@ - not 'signing_secret' in cr_object.stdout - signing_secret != '' -- name: Set custom resource spec db from backup - set_fact: - cr_spec_strip: "{{ cr_spec_strip + ', ' + db_str }}" - when: not 'postgres_configuration_secret' in cr_object.stdout - - name: Set custom resource spec SSO from backup set_fact: cr_spec_strip: "{{ cr_spec_strip + ', ' + sso_str }}" diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index 15d23ea0..d57243f2 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -74,7 +74,7 @@ until: pvc_status.resources | length > 0 retries: 60 delay: 10 - ignore_errors: yes + ignore_errors: true - name: Fail with custom message if PVC not found fail: From b414529a957cd26307b4fa8a756d3e6a32020490 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 19 Apr 2024 12:10:05 -0400 Subject: [PATCH 22/27] Rewrite backup and restore cr_object handling to store in yaml, not invalid json * Refactor how cluster_name is set so that the period is excluded if set to an empty string. --- roles/backup/tasks/secrets.yml | 2 +- roles/restore/defaults/main.yml | 2 +- roles/restore/tasks/deploy_galaxy.yml | 51 +++------------------------ roles/restore/tasks/postgres.yml | 7 +++- 4 files changed, 12 insertions(+), 50 deletions(-) diff --git a/roles/backup/tasks/secrets.yml b/roles/backup/tasks/secrets.yml index f50a78d0..b1e2c77b 100644 --- a/roles/backup/tasks/secrets.yml +++ b/roles/backup/tasks/secrets.yml @@ -5,7 +5,7 @@ namespace: "{{ backup_pvc_namespace }}" pod: "{{ ansible_operator_meta.name }}-backup-manager" command: >- - bash -c "echo '{{ cr_spec }}' > {{ _backup_dir }}/cr_object" + bash -c "echo '{{ cr_spec | to_yaml }}' > {{ _backup_dir }}/cr_object" - name: Get admin_password k8s_info: diff --git a/roles/restore/defaults/main.yml b/roles/restore/defaults/main.yml index ca80df50..57a91fdb 100644 --- a/roles/restore/defaults/main.yml +++ b/roles/restore/defaults/main.yml @@ -18,4 +18,4 @@ db_fields_encryption_secret: '' sso_secret: '' # Default cluster name -cluster_name: 'cluster.local' +cluster_name: '' # On most clusters, this is 'cluster.local' diff --git a/roles/restore/tasks/deploy_galaxy.yml b/roles/restore/tasks/deploy_galaxy.yml index 37049fb4..ada26721 100644 --- a/roles/restore/tasks/deploy_galaxy.yml +++ b/roles/restore/tasks/deploy_galaxy.yml @@ -14,7 +14,7 @@ - name: Set custom resource spec variable from backup set_fact: - cr_spec: "{{ cr_object.stdout }}" + cr_spec_from_backup: "{{ cr_object.stdout }}" cr_spec_strip: "{ " admin_str: "admin_password_secret: {{ admin_password_name }}" storage_str: "object_storage_{{ storage_type | lower }}_secret: {{ storage_secret }}" @@ -22,62 +22,19 @@ container_token_str: "container_token_secret: {{ container_token_secret }}" db_fields_str: "db_fields_encryption_secret: {{ db_fields_encryption_secret }}" signing_str: "signing_secret: {{ signing_secret }}" - db_str: "postgres_configuration_secret: {{ db_secret_name }}" sso_str: "sso_secret: {{ sso_secret }}" -# TODO: Convert all of these tasks to use the combine filter - name: Remove postgres_configuration_secret from spec set_fact: - cr_object: "{{ cr_object | combine({'postgres_configuration_secret': db_str}, recursive=True) }}" + cr_spec_from_backup: "{{ (cr_spec_from_backup[:-2] | from_yaml) | combine({'postgres_configuration_secret': db_secret_name}, recursive=True) }}" when: - database_type is defined - database_type == 'managed' - -- name: Strip end characters from spec - set_fact: - cr_spec_strip: "{{ cr_object.stdout[:-2] }}" - when: - - cr_object.stdout | length > 1 - -- name: Set custom resource spec admin from backup - set_fact: - cr_spec_strip: "{{ cr_spec_strip + ', ' + admin_str }}" - when: not 'admin_password_secret' in cr_object.stdout - -- name: Set custom resource spec storage from backup - set_fact: - cr_spec_strip: "{{ cr_spec_strip + ', ' + storage_str }}" - when: - - not storage_key in cr_object.stdout - - storage_type | lower != 'file' - -- name: Set custom resource spec container token from backup - set_fact: - cr_spec_strip: "{{ cr_spec_strip + ', ' + container_token_str }}" - when: not 'container_token_secret' in cr_object.stdout - -- name: Set custom resource spec db encryption from backup - set_fact: - cr_spec_strip: "{{ cr_spec_strip + ', ' + db_fields_str }}" - when: not 'db_fields_encryption_secret' in cr_object.stdout - -- name: Set custom resource spec galaxy signing from backup - set_fact: - cr_spec_strip: "{{ cr_spec_strip + ', ' + signing_str }}" - when: - - not 'signing_secret' in cr_object.stdout - - signing_secret != '' - -- name: Set custom resource spec SSO from backup - set_fact: - cr_spec_strip: "{{ cr_spec_strip + ', ' + sso_str }}" - when: - - not 'sso_secret' in cr_object.stdout - - sso_secret != '' + - cr_spec_from_backup | length > 1 - name: Set custom resource spec from backup set_fact: - cr_spec: "{{ cr_spec_strip + '}\n' }}" + cr_spec: "{{ cr_spec_from_backup }}" - name: Deploy object k8s: diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index d57243f2..2b11f7af 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -85,9 +85,14 @@ - name: Scale down deployments for migration include_tasks: scale_down_deployments.yml +- name: Set _cluster_name with preceeding dot if it exists + set_fact: + _cluster_name: "{{ cluster_name | default('') | ternary('.' + cluster_name, '') }}" + no_log: "{{ no_log }}" + - name: Set full resolvable host name for postgres pod set_fact: - resolvable_db_host: '{{ (postgres_type == "managed") | ternary(postgres_host + "." + ansible_operator_meta.namespace + ".svc." + cluster_name, postgres_host) }}' # yamllint disable-line rule:line-length + resolvable_db_host: '{{ (postgres_type == "managed") | ternary(postgres_host + "." + ansible_operator_meta.namespace + ".svc" + _cluster_name, postgres_host) }}' # yamllint disable-line rule:line-length no_log: "{{ no_log }}" - name: Set pg_restore command From 697205abda1b84ef0c62f9f53df27c4b59569eca Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Fri, 19 Apr 2024 17:50:32 -0400 Subject: [PATCH 23/27] Wait for the Postgres Service to be ready & create server secret earlier * If the content or worker pods come up too fast and the postgres service is not ready, we get errors in those containers and it crash loops. This resolves that. * Move all of the configuration and variables needed to create the galaxy-server secret to the common role. Prior to this, we would see errors when applying the content and worker deployments because the server-secret did not yet exist. * Before applying the content and worker deployments, check to make sure the galaxy-server secret exists. --- roles/common/defaults/main.yml | 27 ++++- roles/common/tasks/galaxy_server_secret.yml | 98 +++++++++++++++++++ .../tasks/get_node_ip.yml | 0 roles/common/tasks/main.yml | 3 + .../tasks/sso-configuration.yml | 0 .../templates/galaxy-server.secret.yaml.j2 | 0 roles/galaxy-api/defaults/main.yml | 25 ----- roles/galaxy-api/tasks/main.yml | 86 ---------------- roles/galaxy-content/tasks/main.yml | 11 +++ .../galaxy-content.deployment.yaml.j2 | 13 ++- roles/galaxy-worker/tasks/main.yml | 11 +++ .../galaxy-worker.deployment.yaml.j2 | 13 ++- roles/postgres/tasks/main.yml | 6 +- 13 files changed, 176 insertions(+), 117 deletions(-) create mode 100644 roles/common/tasks/galaxy_server_secret.yml rename roles/{galaxy-api => common}/tasks/get_node_ip.yml (100%) rename roles/{galaxy-api => common}/tasks/sso-configuration.yml (100%) rename roles/{galaxy-api => common}/templates/galaxy-server.secret.yaml.j2 (100%) diff --git a/roles/common/defaults/main.yml b/roles/common/defaults/main.yml index 2eb3a8f3..0e52feca 100644 --- a/roles/common/defaults/main.yml +++ b/roles/common/defaults/main.yml @@ -1,5 +1,4 @@ --- -route_host: '' image_pull_secret: '' image_pull_secrets: [] operator_service_account_name: '{{ lookup("env","OPERATOR_SA_NAME") | default("galaxy-operator-sa",true) }}' @@ -127,3 +126,29 @@ node_selector: '' # value: "Galaxy" # effect: "NoSchedule" tolerations: '' + +web_protocol: 'http' +ingress_type: none + +# Host to create the root with. +# If not specific will default to -- +# +route_host: '' + +hostname: '{{ deployment_type }}.example.com' + +# TLS secret for the ingress. The secret either has to exist before hand with +# the corresponding cert and key or just be an indicator for where an automated +# process like cert-manager (enabled via annotations) will store the TLS +# certificate and key. +ingress_tls_secret: '' + +# Secret to lookup that provide the TLS specific +# credentials to deploy +# +route_tls_secret: '' + + +# Set content host +content_host: '{{ ansible_operator_meta.name }}-content-svc' +content_port: '24816' diff --git a/roles/common/tasks/galaxy_server_secret.yml b/roles/common/tasks/galaxy_server_secret.yml new file mode 100644 index 00000000..656096e2 --- /dev/null +++ b/roles/common/tasks/galaxy_server_secret.yml @@ -0,0 +1,98 @@ +--- + +- name: Getting raw pulp_settings + set_fact: + raw_pulp_settings: "{{ raw_spec['pulp_settings'] | default({}) }}" + no_log: "{{ no_log }}" + when: pulp_settings is defined + +- name: Combining pulp_settings + set_fact: + pulp_combined_settings: "{{ default_settings|combine(raw_pulp_settings, recursive=True) if pulp_settings is defined and pulp_settings is not none else default_settings }}" + no_log: "{{ no_log }}" + +# Workaround being unable to do the following, for the subsequent task: +# when: (pulp_settings is not defined) or +# (pulp_settings.content_origin is not defined) +# (short-circuit evaluation only works for multiple separate when statements) +# https://github.com/ansible/ansible/issues/50554 +- name: Setting CONTENT_ORIGIN + set_fact: + content_origin_temp: pulp_settings.content_origin + when: + - pulp_settings is defined + - pulp_settings.content_origin is defined + +- name: Getting raw pulp_settings + set_fact: + raw_pulp_settings: "{{ raw_spec['pulp_settings'] | default({}) }}" + no_log: "{{ no_log }}" + when: pulp_settings is defined + +- name: Set default token authentication secret name if not set by user + set_fact: + container_token_secret: '{{ ansible_operator_meta.name }}-container-auth' + cacheable: yes + when: container_token_secret is not defined + +- include_tasks: + file: get_node_ip.yml + when: content_origin_temp is not defined + +- name: Set default CSRF_TRUSTED_ORIGINS to be used if not set by user + set_fact: + _trusted_origin_1: "http://{{ default_settings.content_origin.split('://')[1] }}" + _trusted_origin_2: "https://{{ default_settings.content_origin.split('://')[1] }}" + when: + - default_settings is defined + - default_settings.content_origin is defined + +- name: Set CSRF_TRUSTED_ORIGINS list based on content_origin + set_fact: + _csrf_trusted_origins: "{{ csrf_trusted_origins | default([]) + [_trusted_origin_1, _trusted_origin_2] }}" + when: + - default_settings is defined + - default_settings.content_origin is defined + +- name: Set default CSRF_TRUSTED_ORIGINS if not set by user + set_fact: + default_settings: "{{ default_settings | combine({'csrf_trusted_origins': _csrf_trusted_origins}) }}" + when: + - default_settings is defined + - default_settings.content_origin is defined + +- name: Combining pulp_settings + set_fact: + pulp_combined_settings: "{{ default_settings|combine(raw_pulp_settings, recursive=True) if pulp_settings is defined and pulp_settings is not none else default_settings }}" + cacheable: yes + no_log: "{{ no_log }}" + +- name: Include redis role + include_role: + name: redis + when: pulp_combined_settings.cache_enabled + +- include_tasks: + file: sso-configuration.yml + when: + - sso_secret is defined + - sso_secret | length + +- k8s_status: + api_version: "{{ api_version }}" + kind: "{{ kind }}" + name: "{{ ansible_operator_meta.name }}" + namespace: "{{ ansible_operator_meta.namespace }}" + conditions: + - type: "{{ deployment_type|capitalize }}-Config-Ready" + message: "Creating {{ ansible_operator_meta.name }}-server Secret resource" + reason: CreatingServerSecret + status: "False" + lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" + +- name: galaxy-server secret + k8s: + state: "{{ deployment_state }}" + definition: "{{ lookup('template', 'templates/galaxy-server.secret.yaml.j2') | from_yaml }}" + register: galaxy_server_secret + no_log: "{{ no_log }}" diff --git a/roles/galaxy-api/tasks/get_node_ip.yml b/roles/common/tasks/get_node_ip.yml similarity index 100% rename from roles/galaxy-api/tasks/get_node_ip.yml rename to roles/common/tasks/get_node_ip.yml diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 1a6193d2..f38c69ab 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -82,3 +82,6 @@ - name: Configure Object Storage include_tasks: object_storage_configuration.yml + +- name: Configure Galaxy Server Secret + include_tasks: galaxy_server_secret.yml diff --git a/roles/galaxy-api/tasks/sso-configuration.yml b/roles/common/tasks/sso-configuration.yml similarity index 100% rename from roles/galaxy-api/tasks/sso-configuration.yml rename to roles/common/tasks/sso-configuration.yml diff --git a/roles/galaxy-api/templates/galaxy-server.secret.yaml.j2 b/roles/common/templates/galaxy-server.secret.yaml.j2 similarity index 100% rename from roles/galaxy-api/templates/galaxy-server.secret.yaml.j2 rename to roles/common/templates/galaxy-server.secret.yaml.j2 diff --git a/roles/galaxy-api/defaults/main.yml b/roles/galaxy-api/defaults/main.yml index ec3d1eff..a1545028 100644 --- a/roles/galaxy-api/defaults/main.yml +++ b/roles/galaxy-api/defaults/main.yml @@ -3,31 +3,6 @@ # see: https://github.com/operator-framework/operator-sdk/issues/1770 raw_spec: "{{ vars['_galaxy_ansible_com_galaxy']['spec'] }}" -# Set content host -content_host: '{{ ansible_operator_meta.name }}-content-svc' -content_port: '24816' - -# Host to create the root with. -# If not specific will default to -- -# -route_host: '' - -hostname: '{{ deployment_type }}.example.com' - -web_protocol: 'http' - -ingress_type: none -# TLS secret for the ingress. The secret either has to exist before hand with -# the corresponding cert and key or just be an indicator for where an automated -# process like cert-manager (enabled via annotations) will store the TLS -# certificate and key. -ingress_tls_secret: '' - -# Secret to lookup that provide the TLS specific -# credentials to deploy -# -route_tls_secret: '' - container_auth_public_key_name: 'container_auth_public_key.pem' container_auth_private_key_name: 'container_auth_private_key.pem' diff --git a/roles/galaxy-api/tasks/main.yml b/roles/galaxy-api/tasks/main.yml index d9df646e..eefcec42 100644 --- a/roles/galaxy-api/tasks/main.yml +++ b/roles/galaxy-api/tasks/main.yml @@ -1,90 +1,4 @@ --- -# Workaround being unable to do the following, for the subsequent task: -# when: (pulp_settings is not defined) or -# (pulp_settings.content_origin is not defined) -# (short-circuit evaluation only works for multiple separate when statements) -# https://github.com/ansible/ansible/issues/50554 -- name: Setting CONTENT_ORIGIN - set_fact: - content_origin_temp: pulp_settings.content_origin - when: - - pulp_settings is defined - - pulp_settings.content_origin is defined - -- name: Getting raw pulp_settings - set_fact: - raw_pulp_settings: "{{ raw_spec['pulp_settings'] | default({}) }}" - no_log: "{{ no_log }}" - when: pulp_settings is defined - -- name: Set default token authentication secret name if not set by user - set_fact: - container_token_secret: '{{ ansible_operator_meta.name }}-container-auth' - cacheable: yes - when: container_token_secret is not defined - -- include_tasks: - file: get_node_ip.yml - when: content_origin_temp is not defined - -- name: Set default CSRF_TRUSTED_ORIGINS to be used if not set by user - set_fact: - _trusted_origin_1: "http://{{ default_settings.content_origin.split('://')[1] }}" - _trusted_origin_2: "https://{{ default_settings.content_origin.split('://')[1] }}" - when: - - default_settings is defined - - default_settings.content_origin is defined - -- name: Set CSRF_TRUSTED_ORIGINS list based on content_origin - set_fact: - _csrf_trusted_origins: "{{ csrf_trusted_origins | default([]) + [_trusted_origin_1, _trusted_origin_2] }}" - when: - - default_settings is defined - - default_settings.content_origin is defined - -- name: Set default CSRF_TRUSTED_ORIGINS if not set by user - set_fact: - default_settings: "{{ default_settings | combine({'csrf_trusted_origins': _csrf_trusted_origins}) }}" - when: - - default_settings is defined - - default_settings.content_origin is defined - -- name: Combining pulp_settings - set_fact: - pulp_combined_settings: "{{ default_settings|combine(raw_pulp_settings, recursive=True) if pulp_settings is defined and pulp_settings is not none else default_settings }}" - cacheable: yes - no_log: "{{ no_log }}" - -- name: Include redis role - include_role: - name: redis - when: pulp_combined_settings.cache_enabled - -- include_tasks: - file: sso-configuration.yml - when: - - sso_secret is defined - - sso_secret | length - -- k8s_status: - api_version: "{{ api_version }}" - kind: "{{ kind }}" - name: "{{ ansible_operator_meta.name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - conditions: - - type: "{{ deployment_type|capitalize }}-API-Ready" - message: "Creating {{ ansible_operator_meta.name }}-server Secret resource" - reason: CreatingServerSecret - status: "False" - lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" - -- name: galaxy-server secret - k8s: - state: "{{ deployment_state }}" - definition: "{{ lookup('template', 'templates/' + item + '.secret.yaml.j2') | from_yaml }}" - with_items: - - galaxy-server - no_log: "{{ no_log }}" - k8s_status: api_version: "{{ api_version }}" diff --git a/roles/galaxy-content/tasks/main.yml b/roles/galaxy-content/tasks/main.yml index 42945a09..1f299bfc 100644 --- a/roles/galaxy-content/tasks/main.yml +++ b/roles/galaxy-content/tasks/main.yml @@ -53,6 +53,17 @@ status: "False" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" +- name: Wait for {{ ansible_operator_meta.name }}-server secret to be created + k8s: + definition: + apiVersion: v1 + kind: Secret + metadata: + name: "{{ ansible_operator_meta.name }}-server" + namespace: "{{ ansible_operator_meta.namespace }}" + wait: yes + wait_timeout: 300 + - name: galaxy-content deployment k8s: state: "{{ deployment_state }}" diff --git a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 index 639f0f69..c2be986c 100644 --- a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 +++ b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 @@ -112,11 +112,22 @@ spec: image: "{{ _image }}" imagePullPolicy: "{{ image_pull_policy }}" command: - - /bin/sh + - /bin/bash - -c - | wait-for-migrations resources: {{ combined_content.resource_requirements }} + env: + - name: POSTGRES_SERVICE_HOST + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: host + - name: POSTGRES_SERVICE_PORT + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: port volumeMounts: - name: {{ ansible_operator_meta.name }}-server mountPath: "/etc/pulp/settings.py" diff --git a/roles/galaxy-worker/tasks/main.yml b/roles/galaxy-worker/tasks/main.yml index fa97130e..6355d33f 100644 --- a/roles/galaxy-worker/tasks/main.yml +++ b/roles/galaxy-worker/tasks/main.yml @@ -43,6 +43,17 @@ status: "False" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" +- name: Wait for {{ ansible_operator_meta.name }}-server secret to be created + k8s: + definition: + apiVersion: v1 + kind: Secret + metadata: + name: "{{ ansible_operator_meta.name }}-server" + namespace: "{{ ansible_operator_meta.namespace }}" + wait: yes + wait_timeout: 300 + - name: galaxy-worker deployment k8s: state: "{{ deployment_state }}" diff --git a/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 b/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 index e66d151f..31d52d3e 100644 --- a/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 +++ b/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 @@ -195,11 +195,22 @@ spec: image: "{{ _image }}" imagePullPolicy: "{{ image_pull_policy }}" command: - - /bin/sh + - /bin/bash - -c - | wait-for-migrations resources: {{ combined_worker.resource_requirements }} + env: + - name: POSTGRES_SERVICE_HOST + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: host + - name: POSTGRES_SERVICE_PORT + valueFrom: + secretKeyRef: + name: {{ _postgres_configuration_secret }} + key: port volumeMounts: - name: {{ ansible_operator_meta.name }}-server mountPath: "/etc/pulp/settings.py" diff --git a/roles/postgres/tasks/main.yml b/roles/postgres/tasks/main.yml index 32d1effa..77bb9b91 100644 --- a/roles/postgres/tasks/main.yml +++ b/roles/postgres/tasks/main.yml @@ -148,9 +148,10 @@ namespace: "{{ ansible_operator_meta.namespace }}" name: "{{ ansible_operator_meta.name }}-postgres-{{ supported_pg_version }}" register: _pg_sts_status + until: _pg_sts_status['resources'][0]['status']['availableReplicas'] == _pg_sts_status['resources'][0]['status']['replicas'] + retries: 20 + delay: 10 -# This status will probably not reflect the real state during the first playbook execution -# I thought to put a wait_for here, but it would just delay the execution of the other tasks - operator_sdk.util.k8s_status: api_version: "{{ api_version }}" kind: "{{ kind }}" @@ -162,4 +163,3 @@ reason: DatabaseTasksFinished status: "True" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" -# when: _pg_sts_status['resources'][0]['status']['availableReplicas'] == _pg_sts_status['resources'][0]['status']['replicas'] From 6215f549fa3e6554e42081b3463597b2f949dd27 Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Mon, 29 Apr 2024 14:41:51 -0400 Subject: [PATCH 24/27] Add checksum for secrets and configmaps to deployments so containers cycle to pick up changes --- .../tasks/db_fields_encryption_configuration.yml | 8 ++++++++ roles/common/tasks/galaxy_server_secret.yml | 2 +- .../templates/galaxy-api.deployment.yaml.j2 | 8 ++++++-- .../templates/galaxy-content.deployment.yaml.j2 | 13 +++++++++++++ .../templates/galaxy-worker.deployment.yaml.j2 | 8 ++++++-- roles/postgres/tasks/upgrade_postgres.yml | 6 ++++++ 6 files changed, 40 insertions(+), 5 deletions(-) diff --git a/roles/common/tasks/db_fields_encryption_configuration.yml b/roles/common/tasks/db_fields_encryption_configuration.yml index 1e47a706..afb61054 100644 --- a/roles/common/tasks/db_fields_encryption_configuration.yml +++ b/roles/common/tasks/db_fields_encryption_configuration.yml @@ -70,3 +70,11 @@ no_log: "{{ no_log }}" when: not _db_fields_encryption_secret['resources'] | default([]) | length + +- name: Retrieve db_fields_encryption_key Secret + k8s_info: + kind: Secret + namespace: '{{ ansible_operator_meta.namespace }}' + name: '{{ db_fields_encryption_secret }}' + register: db_fields_encryption_secret_contents + no_log: "{{ no_log }}" diff --git a/roles/common/tasks/galaxy_server_secret.yml b/roles/common/tasks/galaxy_server_secret.yml index 656096e2..789e667f 100644 --- a/roles/common/tasks/galaxy_server_secret.yml +++ b/roles/common/tasks/galaxy_server_secret.yml @@ -84,7 +84,7 @@ name: "{{ ansible_operator_meta.name }}" namespace: "{{ ansible_operator_meta.namespace }}" conditions: - - type: "{{ deployment_type|capitalize }}-Config-Ready" + - type: "{{ deployment_type|capitalize }}-API-Ready" message: "Creating {{ ansible_operator_meta.name }}-server Secret resource" reason: CreatingServerSecret status: "False" diff --git a/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 b/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 index 215e0139..cdda05d6 100644 --- a/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 +++ b/roles/galaxy-api/templates/galaxy-api.deployment.yaml.j2 @@ -40,10 +40,14 @@ spec: app.kubernetes.io/component: api app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' -{% if signing_secret is defined %} annotations: kubectl.kubernetes.io/default-container: api -{% endif %} +{% for secret in [ + "galaxy_server_secret", + "db_fields_encryption_secret_contents", + ] %} + checksum-secret-{{ secret }}: "{{ lookup('ansible.builtin.vars', secret, default='')["resources"][0]["data"] | default('') | sha1 }}" +{% endfor %} spec: {% if _node_affinity is defined %} affinity: diff --git a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 index c2be986c..450b08ba 100644 --- a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 +++ b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 @@ -41,6 +41,19 @@ spec: app.kubernetes.io/component: content-server app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' + annotations: + kubectl.kubernetes.io/default-container: content +{% for template in [ + "galaxy-file-storage.pvc", + ] %} + checksum-{{ template | replace('/', '-') }}: "{{ lookup('template', template + '.yaml.j2') | sha1 }}" +{% endfor %} +{% for secret in [ + "galaxy_server_secret", + "db_fields_encryption_secret_contents", + ] %} + checksum-secret-{{ secret }}: "{{ lookup('ansible.builtin.vars', secret, default='')["resources"][0]["data"] | default('') | sha1 }}" +{% endfor %} spec: {% if _node_affinity is defined %} affinity: diff --git a/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 b/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 index 31d52d3e..54a5b47b 100644 --- a/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 +++ b/roles/galaxy-worker/templates/galaxy-worker.deployment.yaml.j2 @@ -41,10 +41,14 @@ spec: app.kubernetes.io/component: worker app.kubernetes.io/part-of: '{{ deployment_type }}' app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' -{% if signing_secret is defined %} annotations: kubectl.kubernetes.io/default-container: worker -{% endif %} +{% for secret in [ + "galaxy_server_secret", + "db_fields_encryption_secret_contents", + ] %} + checksum-secret-{{ secret }}: "{{ lookup('ansible.builtin.vars', secret, default='')["resources"][0]["data"] | default('') | sha1 }}" +{% endfor %} spec: {% if _node_affinity is defined %} affinity: diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index 805d7d9b..b81832f0 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -314,3 +314,9 @@ status: "True" lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" +- name: Update galaxy-server secret with new postgres host + k8s: + state: "{{ deployment_state }}" + definition: "{{ lookup('template', '../common/templates/galaxy-server.secret.yaml.j2') | from_yaml }}" + register: galaxy_server_secret + no_log: "{{ no_log }}" From bbbc8b0adde2ea5423694f2a9a7201c1fd543bff Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Mon, 29 Apr 2024 17:44:56 -0400 Subject: [PATCH 25/27] Refactor: Split Galaxy Server secret into a dedicated role - without this change, nodeport deployments failed because the web service was node created by the time get_node_ip.yml was run. - Re-enable all PR checks --- .github/workflows/pr.yml | 12 ++++---- playbooks/galaxy.yaml | 1 + roles/common/tasks/main.yml | 2 -- roles/galaxy-config/defaults/main.yml | 1 + roles/galaxy-config/meta/main.yml | 17 +++++++++++ .../tasks/combine_galaxy_settings.yml} | 30 ------------------- .../tasks/galaxy_server_secret.yml | 20 +++++++++++++ .../tasks/get_node_ip.yml | 0 roles/galaxy-config/tasks/main.yml | 18 +++++++++++ .../tasks/sso-configuration.yml | 0 .../templates/galaxy-server.secret.yaml.j2 | 0 roles/postgres/tasks/upgrade_postgres.yml | 2 +- 12 files changed, 64 insertions(+), 39 deletions(-) create mode 100644 roles/galaxy-config/defaults/main.yml create mode 100644 roles/galaxy-config/meta/main.yml rename roles/{common/tasks/galaxy_server_secret.yml => galaxy-config/tasks/combine_galaxy_settings.yml} (73%) create mode 100644 roles/galaxy-config/tasks/galaxy_server_secret.yml rename roles/{common => galaxy-config}/tasks/get_node_ip.yml (100%) create mode 100644 roles/galaxy-config/tasks/main.yml rename roles/{common => galaxy-config}/tasks/sso-configuration.yml (100%) rename roles/{common => galaxy-config}/templates/galaxy-server.secret.yaml.j2 (100%) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 90e46ce2..3c05c227 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -14,12 +14,12 @@ jobs: include: - STORAGE: filesystem IMAGE: minimal - # - STORAGE: filesystem - # IMAGE: s6 - # - STORAGE: azure - # IMAGE: minimal - # - STORAGE: s3 - # IMAGE: minimal + - STORAGE: filesystem + IMAGE: s6 + - STORAGE: azure + IMAGE: minimal + - STORAGE: s3 + IMAGE: minimal steps: - name: PR head repo id: head_repo_name diff --git a/playbooks/galaxy.yaml b/playbooks/galaxy.yaml index 50bb1cb4..f59d196a 100644 --- a/playbooks/galaxy.yaml +++ b/playbooks/galaxy.yaml @@ -65,6 +65,7 @@ - common - postgres - galaxy-web + - galaxy-config tasks: diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index f38c69ab..376f75b1 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -83,5 +83,3 @@ - name: Configure Object Storage include_tasks: object_storage_configuration.yml -- name: Configure Galaxy Server Secret - include_tasks: galaxy_server_secret.yml diff --git a/roles/galaxy-config/defaults/main.yml b/roles/galaxy-config/defaults/main.yml new file mode 100644 index 00000000..ed97d539 --- /dev/null +++ b/roles/galaxy-config/defaults/main.yml @@ -0,0 +1 @@ +--- diff --git a/roles/galaxy-config/meta/main.yml b/roles/galaxy-config/meta/main.yml new file mode 100644 index 00000000..a1f3d8d2 --- /dev/null +++ b/roles/galaxy-config/meta/main.yml @@ -0,0 +1,17 @@ +--- +galaxy_info: + author: Galaxy-Operator Team + description: A role to setup Galaxy configuration + issue_tracker_url: https://github.com/ansible/galaxy-operator/issues/new + license: GPL-2.0-or-later + company: Red Hat + galaxy_tags: + - galaxy + - pulp + - pulpcore +dependencies: [] + # List your role dependencies here, one per line. Be sure to remove the '[]' above, + # if you add dependencies to this list. +collections: +- operator_sdk.util +- kubernetes.core diff --git a/roles/common/tasks/galaxy_server_secret.yml b/roles/galaxy-config/tasks/combine_galaxy_settings.yml similarity index 73% rename from roles/common/tasks/galaxy_server_secret.yml rename to roles/galaxy-config/tasks/combine_galaxy_settings.yml index 789e667f..917ab761 100644 --- a/roles/common/tasks/galaxy_server_secret.yml +++ b/roles/galaxy-config/tasks/combine_galaxy_settings.yml @@ -66,33 +66,3 @@ pulp_combined_settings: "{{ default_settings|combine(raw_pulp_settings, recursive=True) if pulp_settings is defined and pulp_settings is not none else default_settings }}" cacheable: yes no_log: "{{ no_log }}" - -- name: Include redis role - include_role: - name: redis - when: pulp_combined_settings.cache_enabled - -- include_tasks: - file: sso-configuration.yml - when: - - sso_secret is defined - - sso_secret | length - -- k8s_status: - api_version: "{{ api_version }}" - kind: "{{ kind }}" - name: "{{ ansible_operator_meta.name }}" - namespace: "{{ ansible_operator_meta.namespace }}" - conditions: - - type: "{{ deployment_type|capitalize }}-API-Ready" - message: "Creating {{ ansible_operator_meta.name }}-server Secret resource" - reason: CreatingServerSecret - status: "False" - lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" - -- name: galaxy-server secret - k8s: - state: "{{ deployment_state }}" - definition: "{{ lookup('template', 'templates/galaxy-server.secret.yaml.j2') | from_yaml }}" - register: galaxy_server_secret - no_log: "{{ no_log }}" diff --git a/roles/galaxy-config/tasks/galaxy_server_secret.yml b/roles/galaxy-config/tasks/galaxy_server_secret.yml new file mode 100644 index 00000000..104ca7a0 --- /dev/null +++ b/roles/galaxy-config/tasks/galaxy_server_secret.yml @@ -0,0 +1,20 @@ +--- + +- k8s_status: + api_version: "{{ api_version }}" + kind: "{{ kind }}" + name: "{{ ansible_operator_meta.name }}" + namespace: "{{ ansible_operator_meta.namespace }}" + conditions: + - type: "{{ deployment_type|capitalize }}-API-Ready" + message: "Creating {{ ansible_operator_meta.name }}-server Secret resource" + reason: CreatingServerSecret + status: "False" + lastTransitionTime: "{{ lookup('pipe', 'date --iso-8601=seconds') }}" + +- name: galaxy-server secret + k8s: + state: "{{ deployment_state }}" + definition: "{{ lookup('template', 'templates/galaxy-server.secret.yaml.j2') | from_yaml }}" + register: galaxy_server_secret + no_log: "{{ no_log }}" diff --git a/roles/common/tasks/get_node_ip.yml b/roles/galaxy-config/tasks/get_node_ip.yml similarity index 100% rename from roles/common/tasks/get_node_ip.yml rename to roles/galaxy-config/tasks/get_node_ip.yml diff --git a/roles/galaxy-config/tasks/main.yml b/roles/galaxy-config/tasks/main.yml new file mode 100644 index 00000000..08497c1d --- /dev/null +++ b/roles/galaxy-config/tasks/main.yml @@ -0,0 +1,18 @@ +--- + +- name: Configure Galaxy Server Secret + include_tasks: combine_galaxy_settings.yml + +- name: Include redis role + include_role: + name: redis + when: pulp_combined_settings.cache_enabled + +- include_tasks: + file: sso-configuration.yml + when: + - sso_secret is defined + - sso_secret | length + +- name: Configure Galaxy Server Secret + include_tasks: galaxy_server_secret.yml diff --git a/roles/common/tasks/sso-configuration.yml b/roles/galaxy-config/tasks/sso-configuration.yml similarity index 100% rename from roles/common/tasks/sso-configuration.yml rename to roles/galaxy-config/tasks/sso-configuration.yml diff --git a/roles/common/templates/galaxy-server.secret.yaml.j2 b/roles/galaxy-config/templates/galaxy-server.secret.yaml.j2 similarity index 100% rename from roles/common/templates/galaxy-server.secret.yaml.j2 rename to roles/galaxy-config/templates/galaxy-server.secret.yaml.j2 diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index b81832f0..5f9e8d82 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -317,6 +317,6 @@ - name: Update galaxy-server secret with new postgres host k8s: state: "{{ deployment_state }}" - definition: "{{ lookup('template', '../common/templates/galaxy-server.secret.yaml.j2') | from_yaml }}" + definition: "{{ lookup('template', '../galaxy-config/templates/galaxy-server.secret.yaml.j2') | from_yaml }}" register: galaxy_server_secret no_log: "{{ no_log }}" From d72fe0aad832c02ce7f89d079147e7006e4061ce Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Tue, 30 Apr 2024 13:20:31 -0400 Subject: [PATCH 26/27] During backup, use the deployed application image specified on the CR * Add ability to configure backup_resource_requirements and restore_resource_requirements * Deprecate backup_pvc_namespace parameter * Only add file-storage-pvc checksum if file_storage is enabled --- .../galaxy_v1beta1_galaxybackup_crd.yaml | 20 ++++++++++++++++++- .../galaxy_v1beta1_galaxyrestore_crd.yaml | 20 ++++++++++++++++++- ...galaxy-operator.clusterserviceversion.yaml | 14 +++++++++++-- config/rbac/role.yaml | 1 - roles/backup/README.md | 15 ++++++++++++++ roles/backup/defaults/main.yml | 11 +++++++++- roles/backup/tasks/init.yml | 4 ++++ .../templates/backup-content-k8s-job.yaml.j2 | 6 +++++- roles/backup/templates/management-pod.yaml.j2 | 4 ++++ .../galaxy-content.deployment.yaml.j2 | 2 ++ roles/restore/README.md | 14 +++++++++++++ roles/restore/defaults/main.yml | 11 +++++++++- .../restore/templates/management-pod.yaml.j2 | 4 ++++ .../templates/restore-content-k8s-job.yaml.j2 | 4 ++++ 14 files changed, 122 insertions(+), 8 deletions(-) diff --git a/config/crd/bases/galaxy_v1beta1_galaxybackup_crd.yaml b/config/crd/bases/galaxy_v1beta1_galaxybackup_crd.yaml index 6aea29b9..69071e95 100644 --- a/config/crd/bases/galaxy_v1beta1_galaxybackup_crd.yaml +++ b/config/crd/bases/galaxy_v1beta1_galaxybackup_crd.yaml @@ -33,11 +33,29 @@ spec: description: Name of the PVC to be used for storing the backup type: string backup_pvc_namespace: - description: Namespace PVC is in + description: Namespace PVC is in (Deprecated) type: string backup_storage_requirements: description: Storage requirements for the backup type: string + backup_resource_requirements: + description: Resource requirements for the management pod used to create a backup + properties: + requests: + properties: + cpu: + type: string + memory: + type: string + type: object + limits: + properties: + cpu: + type: string + memory: + type: string + type: object + type: object backup_storage_class: description: Storage class to use when creating PVC for backup type: string diff --git a/config/crd/bases/galaxy_v1beta1_galaxyrestore_crd.yaml b/config/crd/bases/galaxy_v1beta1_galaxyrestore_crd.yaml index 80bb151f..309ad121 100644 --- a/config/crd/bases/galaxy_v1beta1_galaxyrestore_crd.yaml +++ b/config/crd/bases/galaxy_v1beta1_galaxyrestore_crd.yaml @@ -42,7 +42,7 @@ spec: description: Name of the PVC to be restored from, set as a status found on the backup object (backupClaim) type: string backup_pvc_namespace: - description: Namespace the PVC is in + description: Namespace the PVC is in (Deprecated) type: string backup_dir: description: Backup directory name, set as a status found on the backup object (backupDirectory) @@ -57,6 +57,24 @@ spec: - azure - Azure type: string + restore_resource_requirements: + description: Resource requirements for the management pod used to do a restore + properties: + requests: + properties: + cpu: + type: string + memory: + type: string + type: object + limits: + properties: + cpu: + type: string + memory: + type: string + type: object + type: object postgres_label_selector: description: Label selector used to identify postgres pod for executing migration type: string diff --git a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml index 987eea59..e9340803 100644 --- a/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/galaxy-operator.clusterserviceversion.yaml @@ -615,17 +615,22 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes:PersistentVolumeClaim - urn:alm:descriptor:com.tectonic.ui:advanced - - displayName: Backup persistent volume claim namespace + - displayName: Backup persistent volume claim namespace (Deprecated) path: backup_pvc_namespace x-descriptors: - urn:alm:descriptor:com.tectonic.ui:text - - urn:alm:descriptor:com.tectonic.ui:advanced + - urn:alm:descriptor:com.tectonic.ui:hidden - urn:alm:descriptor:io.kubernetes:Namespace - displayName: Backup PVC storage requirements path: backup_storage_requirements x-descriptors: - urn:alm:descriptor:com.tectonic.ui:text - urn:alm:descriptor:com.tectonic.ui:advanced + - displayName: Backup Management Pod Resource Requirements + path: backup_resource_requirements + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - urn:alm:descriptor:com.tectonic.ui:resourceRequirements - displayName: Backup PVC storage class path: backup_storage_class x-descriptors: @@ -737,6 +742,11 @@ spec: - urn:alm:descriptor:com.tectonic.ui:select:S3 - urn:alm:descriptor:com.tectonic.ui:select:Azure - urn:alm:descriptor:com.tectonic.ui:fieldDependency:backup_source:PVC + - displayName: Restore Management Pod Resource Requirements + path: restore_resource_requirements + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:advanced + - urn:alm:descriptor:com.tectonic.ui:resourceRequirements - displayName: Database restore label selector path: postgres_label_selector x-descriptors: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 7bcf74b4..cc302319 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -94,7 +94,6 @@ rules: - apiGroups: - batch resources: - - cronjobs - jobs verbs: - get diff --git a/roles/backup/README.md b/roles/backup/README.md index b1bd63d0..c5742d11 100644 --- a/roles/backup/README.md +++ b/roles/backup/README.md @@ -11,9 +11,24 @@ Role Variables * `deployment_name`: The name of the galaxy custom resource to backup * `backup_pvc`: The name of the PVC to uses for backup * `backup_storage_requirements`: The size of storage for the PVC created by operator if one is not supplied +* `backup_resource_requirements`: The size of storage for the PVC created by operator if one is not supplied * `backup_storage_class`: The storage class to be used for the backup PVC * `postgres_configuration_secret`: The postgres_configuration_secret + +Defining resources limits and request for backup CR + +``` +backup_resource_requirements: + limits: + cpu: "1000m" + memory: "4096Mi" + requests: + cpu: "25m" + memory: "32Mi" +``` + + Requirements ------------ diff --git a/roles/backup/defaults/main.yml b/roles/backup/defaults/main.yml index aa2e51ca..2d902c27 100644 --- a/roles/backup/defaults/main.yml +++ b/roles/backup/defaults/main.yml @@ -4,7 +4,7 @@ deployment_name: '' # Specify a pre-created PVC (name) to backup to backup_pvc: '' -backup_pvc_namespace: "{{ ansible_operator_meta.namespace }}" +backup_pvc_namespace: "{{ ansible_operator_meta.namespace }}" # deprecated # Size of backup PVC if created dynamically backup_storage_requirements: '' @@ -21,3 +21,12 @@ custom_resource_key: '_galaxy_ansible_com_galaxybackup' database_type: 'unmanaged' azure_container_path: '' + +# Default resource requirements +backup_resource_requirements: + limits: + cpu: "1000m" + memory: "4096Mi" + requests: + cpu: "25m" + memory: "32Mi" diff --git a/roles/backup/tasks/init.yml b/roles/backup/tasks/init.yml index a8393317..8109bd5b 100644 --- a/roles/backup/tasks/init.yml +++ b/roles/backup/tasks/init.yml @@ -76,6 +76,10 @@ set_fact: _galaxy: "{{ _custom_resource['resources'][0] }}" +- name: Set Galaxy Image used + set_fact: + _galaxy_image: "{{ _galaxy['status']['deployedImage'] }}" + - name: Set apiVersion set_fact: api_version: "{{ _galaxy['apiVersion'] }}" diff --git a/roles/backup/templates/backup-content-k8s-job.yaml.j2 b/roles/backup/templates/backup-content-k8s-job.yaml.j2 index b8e7fbf8..d4c674b9 100644 --- a/roles/backup/templates/backup-content-k8s-job.yaml.j2 +++ b/roles/backup/templates/backup-content-k8s-job.yaml.j2 @@ -22,7 +22,7 @@ spec: {% endif %} containers: - name: backup-content - image: quay.io/ansible/galaxy-ng:latest # TODO: Add set image tasks, and var ref here, etc. + image: {{ _galaxy_image }} imagePullPolicy: Always command: - /bin/bash @@ -30,6 +30,10 @@ spec: - | mkdir -p {{ _backup_dir }}/pulp/ cp -fr /var/lib/pulp/. {{ _backup_dir }}/pulp +{% if backup_resource_requirements is defined %} + resources: + {{ backup_resource_requirements | to_nice_yaml(indent=2) | indent(width=10, first=False) }} +{%- endif %} volumeMounts: - name: {{ ansible_operator_meta.name }}-backup mountPath: /backups diff --git a/roles/backup/templates/management-pod.yaml.j2 b/roles/backup/templates/management-pod.yaml.j2 index a8498420..7ecca94b 100644 --- a/roles/backup/templates/management-pod.yaml.j2 +++ b/roles/backup/templates/management-pod.yaml.j2 @@ -25,6 +25,10 @@ spec: - | mkdir -p {{ _backup_dir }}/ sleep infinity +{% if backup_resource_requirements is defined %} + resources: + {{ backup_resource_requirements | to_nice_yaml(indent=2) | indent(width=6, first=False) }} +{%- endif %} volumeMounts: - name: {{ ansible_operator_meta.name }}-backup mountPath: /backups diff --git a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 index 450b08ba..afc8fd9a 100644 --- a/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 +++ b/roles/galaxy-content/templates/galaxy-content.deployment.yaml.j2 @@ -43,11 +43,13 @@ spec: app.kubernetes.io/managed-by: '{{ deployment_type }}-operator' annotations: kubectl.kubernetes.io/default-container: content +{% if is_file_storage %} {% for template in [ "galaxy-file-storage.pvc", ] %} checksum-{{ template | replace('/', '-') }}: "{{ lookup('template', template + '.yaml.j2') | sha1 }}" {% endfor %} +{% endif %} {% for secret in [ "galaxy_server_secret", "db_fields_encryption_secret_contents", diff --git a/roles/restore/README.md b/roles/restore/README.md index 4d8b4847..347195e5 100644 --- a/roles/restore/README.md +++ b/roles/restore/README.md @@ -10,6 +10,20 @@ Role Variables * `backup_name`: The name of the galaxy backup custom resource to restore from * `postgres_label_selector`: The label selector for an external container based database +* `restore_resource_requirements`: The resources limits and requests for restore CR + + +Defining resources limits and requests for restore CR + +``` +restore_resource_requirements: + limits: + cpu: "1000m" + memory: "4096Mi" + requests: + cpu: "25m" + memory: "32Mi" +``` Requirements ------------ diff --git a/roles/restore/defaults/main.yml b/roles/restore/defaults/main.yml index 57a91fdb..ccad5256 100644 --- a/roles/restore/defaults/main.yml +++ b/roles/restore/defaults/main.yml @@ -4,7 +4,7 @@ backup_name: '' # Required: specify a pre-created PVC (name) to restore from backup_pvc: '' -backup_pvc_namespace: "{{ ansible_operator_meta.namespace }}" +backup_pvc_namespace: "{{ ansible_operator_meta.namespace }}" # deprecated # Required: backup name, found on the backup object backup_dir: '' @@ -19,3 +19,12 @@ sso_secret: '' # Default cluster name cluster_name: '' # On most clusters, this is 'cluster.local' + +# Default resource requirements +restore_resource_requirements: + limits: + cpu: "1000m" + memory: "4096Mi" + requests: + cpu: "25m" + memory: "32Mi" diff --git a/roles/restore/templates/management-pod.yaml.j2 b/roles/restore/templates/management-pod.yaml.j2 index 310c3dce..af515bff 100644 --- a/roles/restore/templates/management-pod.yaml.j2 +++ b/roles/restore/templates/management-pod.yaml.j2 @@ -24,6 +24,10 @@ spec: - -c - | sleep infinity +{% if restore_resource_requirements is defined %} + resources: + {{ restore_resource_requirements | to_nice_yaml(indent=2) | indent(width=6, first=False) }} +{%- endif %} volumeMounts: - name: {{ ansible_operator_meta.name }}-backup mountPath: /backups diff --git a/roles/restore/templates/restore-content-k8s-job.yaml.j2 b/roles/restore/templates/restore-content-k8s-job.yaml.j2 index 77a9b94f..a718e1b9 100644 --- a/roles/restore/templates/restore-content-k8s-job.yaml.j2 +++ b/roles/restore/templates/restore-content-k8s-job.yaml.j2 @@ -30,6 +30,10 @@ spec: - | stat {{ backup_dir }}/pulp/ cp -fr {{ backup_dir }}/pulp/. /var/lib/pulp +{% if restore_resource_requirements is defined %} + resources: + {{ restore_resource_requirements | to_nice_yaml(indent=2) | indent(width=10, first=False) }} +{%- endif %} volumeMounts: - name: {{ ansible_operator_meta.name }}-backup mountPath: /backups From fe46e196c52a7d322211432461ad133bc86d663e Mon Sep 17 00:00:00 2001 From: "Christian M. Adams" Date: Tue, 30 Apr 2024 13:29:07 -0400 Subject: [PATCH 27/27] Always specify api_version with k8s tasks using the Pod resource --- .github/workflows/pr.yml | 5 +---- roles/backup/tasks/cleanup.yml | 1 + roles/backup/tasks/init.yml | 2 ++ roles/backup/tasks/postgres.yml | 1 + roles/backup/tasks/remove_management_pod.yml | 1 + roles/galaxy-config/tasks/get_node_ip.yml | 1 + roles/galaxy-status/tasks/main.yml | 1 + roles/postgres/tasks/main.yml | 1 + roles/postgres/tasks/migrate_data.yml | 1 + roles/postgres/tasks/upgrade_postgres.yml | 1 + roles/restore/tasks/init.yml | 2 ++ roles/restore/tasks/postgres.yml | 1 + roles/restore/tasks/remove_management_pod.yml | 1 + 13 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 3c05c227..1db48735 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -122,10 +122,6 @@ jobs: run: sudo -E .ci/scripts/galaxy_ng-tests.sh -m shell: bash - # # TODO: Remove after testing - # - name: Setup tmate session - # uses: mxschmitt/action-tmate@v3 - - name: Backup & Restore run: CI_TEST=galaxy .ci/scripts/backup_and_restore.sh -m shell: bash @@ -135,6 +131,7 @@ jobs: - name: Logs if: always() run: .github/workflows/scripts/show_logs.sh + # - name: Debugging example (uncomment when needed) # if: failure() # uses: mxschmitt/action-tmate@v3 diff --git a/roles/backup/tasks/cleanup.yml b/roles/backup/tasks/cleanup.yml index 1f355170..65a56e82 100644 --- a/roles/backup/tasks/cleanup.yml +++ b/roles/backup/tasks/cleanup.yml @@ -10,6 +10,7 @@ k8s: name: "{{ ansible_operator_meta.name }}-backup-manager" kind: Pod + api_version: v1 namespace: "{{ backup_pvc_namespace }}" state: absent force: true diff --git a/roles/backup/tasks/init.yml b/roles/backup/tasks/init.yml index 8109bd5b..8411b075 100644 --- a/roles/backup/tasks/init.yml +++ b/roles/backup/tasks/init.yml @@ -4,6 +4,7 @@ k8s: name: "{{ ansible_operator_meta.name }}-backup-manager" kind: Pod + api_version: v1 namespace: "{{ backup_pvc_namespace }}" state: absent force: true @@ -199,6 +200,7 @@ - name: Wait for backup management pod to be running k8s_info: kind: Pod + api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - "app.kubernetes.io/component=backup-manager" diff --git a/roles/backup/tasks/postgres.yml b/roles/backup/tasks/postgres.yml index 8d230ff1..67980ade 100644 --- a/roles/backup/tasks/postgres.yml +++ b/roles/backup/tasks/postgres.yml @@ -41,6 +41,7 @@ - name: Get the postgres pod information k8s_info: kind: Pod + api_version: v1 namespace: '{{ ansible_operator_meta.namespace }}' label_selectors: - "{{ postgres_label_selector }}" diff --git a/roles/backup/tasks/remove_management_pod.yml b/roles/backup/tasks/remove_management_pod.yml index 31129677..40682ec7 100644 --- a/roles/backup/tasks/remove_management_pod.yml +++ b/roles/backup/tasks/remove_management_pod.yml @@ -3,6 +3,7 @@ k8s: name: "{{ ansible_operator_meta.name }}-backup-manager" kind: Pod + api_version: v1 namespace: "{{ backup_pvc_namespace }}" state: absent force: true diff --git a/roles/galaxy-config/tasks/get_node_ip.yml b/roles/galaxy-config/tasks/get_node_ip.yml index a14c8d55..881c10bd 100644 --- a/roles/galaxy-config/tasks/get_node_ip.yml +++ b/roles/galaxy-config/tasks/get_node_ip.yml @@ -79,6 +79,7 @@ - name: Retrieve web pod k8s_info: kind: Pod + api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - "app.kubernetes.io/instance = nginx-{{ ansible_operator_meta.name }}" diff --git a/roles/galaxy-status/tasks/main.yml b/roles/galaxy-status/tasks/main.yml index c66d9822..e57ebd82 100644 --- a/roles/galaxy-status/tasks/main.yml +++ b/roles/galaxy-status/tasks/main.yml @@ -32,6 +32,7 @@ - name: Get the resource pod information. k8s_info: kind: Pod + api_version: v1 namespace: '{{ ansible_operator_meta.namespace }}' label_selectors: - "app.kubernetes.io/instance=nginx-{{ ansible_operator_meta.name }}" diff --git a/roles/postgres/tasks/main.yml b/roles/postgres/tasks/main.yml index 77bb9b91..1bed3538 100644 --- a/roles/postgres/tasks/main.yml +++ b/roles/postgres/tasks/main.yml @@ -36,6 +36,7 @@ - name: Get the old postgres pod (N-1) kubernetes.core.k8s_info: kind: Pod + api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - "{{ postgres_label_selector }}" diff --git a/roles/postgres/tasks/migrate_data.yml b/roles/postgres/tasks/migrate_data.yml index cea4fff1..032f3ab9 100644 --- a/roles/postgres/tasks/migrate_data.yml +++ b/roles/postgres/tasks/migrate_data.yml @@ -18,6 +18,7 @@ - name: Get the postgres pod information k8s_info: kind: Pod + api_version: v1 namespace: '{{ ansible_operator_meta.namespace }}' label_selectors: - "{{ postgres_label_selector }}" diff --git a/roles/postgres/tasks/upgrade_postgres.yml b/roles/postgres/tasks/upgrade_postgres.yml index 5f9e8d82..ec601ac3 100644 --- a/roles/postgres/tasks/upgrade_postgres.yml +++ b/roles/postgres/tasks/upgrade_postgres.yml @@ -80,6 +80,7 @@ - name: Get new postgres pod information k8s_info: kind: Pod + api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - "{{ postgres_label_selector }}" diff --git a/roles/restore/tasks/init.yml b/roles/restore/tasks/init.yml index 18db56d2..d6608003 100644 --- a/roles/restore/tasks/init.yml +++ b/roles/restore/tasks/init.yml @@ -146,6 +146,7 @@ k8s: name: "{{ ansible_operator_meta.name }}-backup-manager" kind: Pod + api_version: v1 namespace: "{{ backup_pvc_namespace }}" state: absent force: true @@ -170,6 +171,7 @@ - name: Wait for restore management pod to be running k8s_info: kind: Pod + api_version: v1 namespace: "{{ ansible_operator_meta.namespace }}" label_selectors: - "app.kubernetes.io/component=backup-manager" diff --git a/roles/restore/tasks/postgres.yml b/roles/restore/tasks/postgres.yml index 2b11f7af..98518e57 100644 --- a/roles/restore/tasks/postgres.yml +++ b/roles/restore/tasks/postgres.yml @@ -43,6 +43,7 @@ - name: Get the postgres pod information k8s_info: kind: Pod + api_version: v1 namespace: '{{ ansible_operator_meta.namespace }}' label_selectors: - "{{ postgres_label_selector }}" diff --git a/roles/restore/tasks/remove_management_pod.yml b/roles/restore/tasks/remove_management_pod.yml index 31129677..40682ec7 100644 --- a/roles/restore/tasks/remove_management_pod.yml +++ b/roles/restore/tasks/remove_management_pod.yml @@ -3,6 +3,7 @@ k8s: name: "{{ ansible_operator_meta.name }}-backup-manager" kind: Pod + api_version: v1 namespace: "{{ backup_pvc_namespace }}" state: absent force: true