From 5f76b55a39c00e15773f4e56a2b59474b7c56930 Mon Sep 17 00:00:00 2001 From: Sylvain Bauza Date: Wed, 9 Oct 2024 20:51:49 +0200 Subject: [PATCH] [Draft] Split nvidia-mdev arch in two stages --- automation/vars/nvidia-mdev.yaml | 31 +++- .../nvidia-mdev/edpm-post-driver/.gitignore | 2 + .../edpm-post-driver/deployment/.gitignore | 1 + .../deployment/kustomization.yaml | 12 ++ .../edpm-post-driver/deployment/values.yaml | 10 ++ .../edpm-post-driver/nodeset/.gitignore | 1 + .../nodeset/kustomization.yaml | 12 ++ .../edpm-post-driver/nodeset/values.yaml | 148 ++++++++++++++++++ .../va/nvidia-mdev/edpm/nodeset/values.yaml | 34 ---- .../deployment/kustomization.yaml | 21 +++ .../nodeset/baremetalset-password-secret.yaml | 9 ++ .../nodeset/kustomization.yaml | 66 ++++++++ .../nodeset/nova_sriov.yaml | 0 .../edpm/nodeset/kustomization.yaml | 28 ---- 14 files changed, 311 insertions(+), 64 deletions(-) create mode 100644 examples/va/nvidia-mdev/edpm-post-driver/.gitignore create mode 100644 examples/va/nvidia-mdev/edpm-post-driver/deployment/.gitignore create mode 100644 examples/va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml create mode 100644 examples/va/nvidia-mdev/edpm-post-driver/deployment/values.yaml create mode 100644 examples/va/nvidia-mdev/edpm-post-driver/nodeset/.gitignore create mode 100644 examples/va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml create mode 100644 examples/va/nvidia-mdev/edpm-post-driver/nodeset/values.yaml create mode 100644 va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml create mode 100644 va/nvidia-mdev/edpm-post-driver/nodeset/baremetalset-password-secret.yaml create mode 100644 va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml rename va/nvidia-mdev/{edpm => edpm-post-driver}/nodeset/nova_sriov.yaml (100%) diff --git a/automation/vars/nvidia-mdev.yaml b/automation/vars/nvidia-mdev.yaml index ef58b3a3c..8d84e16cf 100644 --- a/automation/vars/nvidia-mdev.yaml +++ b/automation/vars/nvidia-mdev.yaml @@ -37,10 +37,10 @@ vas: src_file: values.yaml build_output: nodeset.yaml post_stage_run: - - name: Install nvidia driver + - name: Run phase 1 playbook type: playbook # As a reminder, the job needs to set the nvidia driver URL - source: "../../playbooks/nvidia-mdev.yml" + source: "../../playbooks/nvidia-mdev-phase1.yml" inventory: "${HOME}/ci-framework-data/artifacts/zuul_inventory.yml" - path: examples/va/nvidia-mdev/edpm/deployment @@ -53,3 +53,30 @@ vas: - name: edpm-deployment-values src_file: values.yaml build_output: deployment.yaml + + - path: examples/va/nvidia-mdev/edpm-post-driver/nodeset + wait_conditions: + - >- + oc -n openstack wait + osdpns openstack-edpm --for condition=SetupReady + --timeout=60m + values: + - name: edpm-post-driver-nodeset-values + src_file: values.yaml + build_output: nodeset-post-driver.yaml + post_stage_run: + - name: Run phase 2 playbook + type: playbook + source: "../../playbooks/nvidia-mdev-phase2.yml" + inventory: "${HOME}/ci-framework-data/artifacts/zuul_inventory.yml" + + - path: examples/va/nvidia-mdev/edpm-post-driver/deployment + wait_conditions: + - >- + oc -n openstack wait + osdpns openstack-edpm --for condition=Ready + --timeout=60m + values: + - name: edpm-post-driver-deployment-values + src_file: values.yaml + build_output: deployment-post-driver.yaml diff --git a/examples/va/nvidia-mdev/edpm-post-driver/.gitignore b/examples/va/nvidia-mdev/edpm-post-driver/.gitignore new file mode 100644 index 000000000..835442d0a --- /dev/null +++ b/examples/va/nvidia-mdev/edpm-post-driver/.gitignore @@ -0,0 +1,2 @@ +dataplane-deployment.yaml +dataplane-nodeset.yaml \ No newline at end of file diff --git a/examples/va/nvidia-mdev/edpm-post-driver/deployment/.gitignore b/examples/va/nvidia-mdev/edpm-post-driver/deployment/.gitignore new file mode 100644 index 000000000..56387c5df --- /dev/null +++ b/examples/va/nvidia-mdev/edpm-post-driver/deployment/.gitignore @@ -0,0 +1 @@ +dataplane-deployment.yaml \ No newline at end of file diff --git a/examples/va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml b/examples/va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml new file mode 100644 index 000000000..b3513f001 --- /dev/null +++ b/examples/va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +components: + - ../../../../../va/nvidia-mdev/edpm-post-driver/deployment + # - https://github.com/openstack-k8s-operators/architecture/va/nvidia-mdev/edpm-post-driver/deployment?ref=main + ## It's possible to replace ../../../../../va/nvidia-mdev/edpm-post-driver/deployment/ with a git checkout URL as per: + ## https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md + +resources: + - values.yaml diff --git a/examples/va/nvidia-mdev/edpm-post-driver/deployment/values.yaml b/examples/va/nvidia-mdev/edpm-post-driver/deployment/values.yaml new file mode 100644 index 000000000..7cafaa14f --- /dev/null +++ b/examples/va/nvidia-mdev/edpm-post-driver/deployment/values.yaml @@ -0,0 +1,10 @@ +# yamllint disable rule:line-length +# local-config: referenced, but not emitted by kustomize +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: edpm-post-driver-deployment-values + annotations: + config.kubernetes.io/local-config: "true" +data: {} diff --git a/examples/va/nvidia-mdev/edpm-post-driver/nodeset/.gitignore b/examples/va/nvidia-mdev/edpm-post-driver/nodeset/.gitignore new file mode 100644 index 000000000..721008e8b --- /dev/null +++ b/examples/va/nvidia-mdev/edpm-post-driver/nodeset/.gitignore @@ -0,0 +1 @@ +dataplane-nodeset.yaml \ No newline at end of file diff --git a/examples/va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml b/examples/va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml new file mode 100644 index 000000000..ef7904889 --- /dev/null +++ b/examples/va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +components: + - ../../../../../va/nvidia-mdev/edpm-post-driver/nodeset + # - https://github.com/openstack-k8s-operators/architecture/va/nvidia-mdev/edpm-post-driver/nodeset?ref=main + ## It's possible to replace ../../../../../va/nvidia-mdev/edpm-post-driver/nodeset/ with a git checkout URL as per: + ## https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md + +resources: + - values.yaml diff --git a/examples/va/nvidia-mdev/edpm-post-driver/nodeset/values.yaml b/examples/va/nvidia-mdev/edpm-post-driver/nodeset/values.yaml new file mode 100644 index 000000000..8a8e75afb --- /dev/null +++ b/examples/va/nvidia-mdev/edpm-post-driver/nodeset/values.yaml @@ -0,0 +1,148 @@ +# yamllint disable rule:line-length +# local-config: referenced, but not emitted by kustomize +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: edpm-post-driver-nodeset-values + annotations: + config.kubernetes.io/local-config: "true" +data: + root_password: cmVkaGF0Cg== + preProvisioned: false + baremetalSetTemplate: + ctlplaneInterface: eno2 # CHANGEME + cloudUserName: cloud-admin + provisioningInterface: enp1s0 # CHANGEME + bmhLabelSelector: + app: openstack # CHANGEME + passwordSecret: + name: baremetalset-password-secret + namespace: openstack + ssh_keys: + # Authorized keys that will have access to the dataplane computes via SSH + authorized: CHANGEME + # The private key that will have access to the dataplane computes via SSH + private: CHANGEME2 + # The public key that will have access to the dataplane computes via SSH + public: CHANGEME3 + nodeset: + ansible: + ansibleUser: cloud-admin + ansiblePort: 22 + ansibleVars: + # CHANGEME -- see https://access.redhat.com/solutions/253273 + # edpm_bootstrap_command: | + # subscription-manager register --username --password + # podman login -u -p registry.redhat.io + timesync_ntp_servers: + - hostname: pool.ntp.org + # CPU pinning settings + edpm_kernel_args: "default_hugepagesz=1GB hugepagesz=1G hugepages=16 intel_iommu=on iommu=pt isolcpus=4-23,28-47" + edpm_tuned_profile: "cpu-partitioning-powersave" + edpm_tuned_isolated_cores: "4-23,28-47" + # edpm_network_config + # These vars are edpm_network_config role vars + edpm_network_config_hide_sensitive_logs: false + edpm_network_config_os_net_config_mappings: + edpm-compute-0: + nic2: 6c:fe:54:3f:8a:02 # CHANGEME + nic3: 6c:fe:54:3f:8a:03 # CHANGEME + edpm-compute-1: + nic2: 6b:fe:54:3f:8a:02 # CHANGEME + nic3: 6b:fe:54:3f:8a:03 # CHANGEME + edpm_network_config_template: | + --- + {% set mtu_list = [ctlplane_mtu] %} + {% for network in nodeset_networks %} + {{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }} + {%- endfor %} + {% set min_viable_mtu = mtu_list | max %} + network_config: + - type: ovs_bridge + name: {{ neutron_physical_bridge_name }} + mtu: {{ min_viable_mtu }} + use_dhcp: false + dns_servers: {{ ctlplane_dns_nameservers }} + domain: {{ dns_search_domains }} + addresses: + - ip_netmask: {{ ctlplane_ip }}/{{ ctlplane_cidr }} + routes: {{ ctlplane_host_routes }} + members: + - type: interface + name: nic2 + mtu: {{ min_viable_mtu }} + # force the MAC address of the bridge to this interface + primary: true + {% for network in nodeset_networks %} + - type: vlan + mtu: {{ lookup('vars', networks_lower[network] ~ '_mtu') }} + vlan_id: {{ lookup('vars', networks_lower[network] ~ '_vlan_id') }} + addresses: + - ip_netmask: + {{ lookup('vars', networks_lower[network] ~ '_ip') }}/{{ lookup('vars', networks_lower[network] ~ '_cidr') }} + routes: {{ lookup('vars', networks_lower[network] ~ '_host_routes') }} + {% endfor %} + - type: sriov_pf + name: nic3 + numvfs: 10 + use_dhcp: false + promisc: true + + # These vars are for the network config templates themselves and are + # considered EDPM network defaults. + neutron_physical_bridge_name: br-ex + neutron_public_interface_name: eth0 + # edpm_nodes_validation + edpm_nodes_validation_validate_controllers_icmp: false + edpm_nodes_validation_validate_gateway_icmp: false + dns_search_domains: [] + gather_facts: false + # edpm firewall, change the allowed CIDR if needed + edpm_sshd_configure_firewall: true + edpm_sshd_allowed_ranges: + - 192.168.122.0/24 + # SRIOV settings + edpm_neutron_sriov_agent_SRIOV_NIC_physical_device_mappings: 'sriov-phy4:eno4' + networks: + - defaultRoute: true + name: ctlplane + subnetName: subnet1 + - name: internalapi + subnetName: subnet1 + - name: storage + subnetName: subnet1 + - name: tenant + subnetName: subnet1 + nodes: + edpm-compute-0: + hostName: edpm-compute-0 + edpm-compute-1: + hostName: edpm-compute-1 + services: + - neutron-ovn + - nova-custom-sriov + - neutron-sriov + - neutron-metadata + nova: + compute: + conf: | + # CHANGEME + [DEFAULT] + reserved_host_memory_mb = 4096 + reserved_huge_pages = node:0,size:4,count:524160 + reserved_huge_pages = node:1,size:4,count:524160 + [compute] + cpu_shared_set = 0-3,24-27 + cpu_dedicated_set = 8-23,32-47 + [devices] + mdev_enabled_types = nvidia-268 + migration: + ssh_keys: + private: CHANGEME4 + public: CHANGEME5 + pci: + conf: | + # CHANGEME + [pci] + device_spec = {"vendor_id":"8086", "product_id":"1572", "address": "0000:19:00.3", "physical_network":"sriov-phy4", "trusted":"true"} diff --git a/examples/va/nvidia-mdev/edpm/nodeset/values.yaml b/examples/va/nvidia-mdev/edpm/nodeset/values.yaml index d7a2f949a..ae949ff78 100644 --- a/examples/va/nvidia-mdev/edpm/nodeset/values.yaml +++ b/examples/va/nvidia-mdev/edpm/nodeset/values.yaml @@ -120,41 +120,7 @@ data: edpm-compute-1: hostName: edpm-compute-1 services: - - bootstrap - - download-cache - - configure-network - - validate-network - - install-os - - configure-os - - ssh-known-hosts - - run-os - - reboot-os - - install-certs - - libvirt - - ovn - neutron-ovn - nova-custom-sriov - neutron-sriov - neutron-metadata - nova: - compute: - conf: | - # CHANGEME - [DEFAULT] - reserved_host_memory_mb = 4096 - reserved_huge_pages = node:0,size:4,count:524160 - reserved_huge_pages = node:1,size:4,count:524160 - [compute] - cpu_shared_set = 0-3,24-27 - cpu_dedicated_set = 8-23,32-47 - [devices] - mdev_enabled_types = nvidia-268 - migration: - ssh_keys: - private: CHANGEME4 - public: CHANGEME5 - pci: - conf: | - # CHANGEME - [pci] - device_spec = {"vendor_id":"8086", "product_id":"1572", "address": "0000:19:00.3", "physical_network":"sriov-phy4", "trusted":"true"} diff --git a/va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml b/va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml new file mode 100644 index 000000000..227bcc100 --- /dev/null +++ b/va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +transformers: + # Set namespace to OpenStack on all namespaced objects without a namespace + - |- + apiVersion: builtin + kind: NamespaceTransformer + metadata: + name: _ignored_ + namespace: openstack + setRoleBindingSubjects: none + unsetOnly: true + fieldSpecs: + - path: metadata/name + kind: Namespace + create: true + +components: + - ../../../../lib/dataplane/deployment diff --git a/va/nvidia-mdev/edpm-post-driver/nodeset/baremetalset-password-secret.yaml b/va/nvidia-mdev/edpm-post-driver/nodeset/baremetalset-password-secret.yaml new file mode 100644 index 000000000..41daad38d --- /dev/null +++ b/va/nvidia-mdev/edpm-post-driver/nodeset/baremetalset-password-secret.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: v1 +data: + NodeRootPassword: _replaced_ +kind: Secret +metadata: + name: baremetalset-password-secret + namespace: openstack +type: Opaque diff --git a/va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml b/va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml new file mode 100644 index 000000000..af767ccb2 --- /dev/null +++ b/va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml @@ -0,0 +1,66 @@ +--- +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +transformers: + # Set namespace to OpenStack on all namespaced objects without a namespace + - |- + apiVersion: builtin + kind: NamespaceTransformer + metadata: + name: _ignored_ + namespace: openstack + setRoleBindingSubjects: none + unsetOnly: true + fieldSpecs: + - path: metadata/name + kind: Namespace + create: true + +components: + - ../../../../lib/dataplane/nodeset + +resources: + - baremetalset-password-secret.yaml + - nova_sriov.yaml + +replacements: + - source: + kind: ConfigMap + name: edpm-nodeset-values + fieldPath: data.root_password + targets: + - select: + kind: Secret + name: baremetalset-password-secret + fieldPaths: + - data.NodeRootPassword + options: + create: true + + # Nova compute CPU pinning customization + - source: + kind: ConfigMap + name: edpm-nodeset-values + fieldPath: data.nova.compute.conf + targets: + - select: + kind: ConfigMap + name: cpu-pinning-nova + fieldPaths: + - data.25-cpu-pinning-nova\.conf + options: + create: true + # Nova compute PCI passthrough customization + - source: + kind: ConfigMap + name: edpm-nodeset-values + fieldPath: data.nova.pci.conf + targets: + - select: + kind: ConfigMap + name: sriov-nova + fieldPaths: + - data.03-sriov-nova\.conf + options: + create: true diff --git a/va/nvidia-mdev/edpm/nodeset/nova_sriov.yaml b/va/nvidia-mdev/edpm-post-driver/nodeset/nova_sriov.yaml similarity index 100% rename from va/nvidia-mdev/edpm/nodeset/nova_sriov.yaml rename to va/nvidia-mdev/edpm-post-driver/nodeset/nova_sriov.yaml diff --git a/va/nvidia-mdev/edpm/nodeset/kustomization.yaml b/va/nvidia-mdev/edpm/nodeset/kustomization.yaml index 0c4325864..0d10ffa9f 100644 --- a/va/nvidia-mdev/edpm/nodeset/kustomization.yaml +++ b/va/nvidia-mdev/edpm/nodeset/kustomization.yaml @@ -22,7 +22,6 @@ components: resources: - baremetalset-password-secret.yaml - - nova_sriov.yaml replacements: - source: @@ -37,33 +36,6 @@ replacements: - data.NodeRootPassword options: create: true - - # Nova compute CPU pinning customization - - source: - kind: ConfigMap - name: edpm-nodeset-values - fieldPath: data.nova.compute.conf - targets: - - select: - kind: ConfigMap - name: cpu-pinning-nova - fieldPaths: - - data.25-cpu-pinning-nova\.conf - options: - create: true - # Nova compute PCI passthrough customization - - source: - kind: ConfigMap - name: edpm-nodeset-values - fieldPath: data.nova.pci.conf - targets: - - select: - kind: ConfigMap - name: sriov-nova - fieldPaths: - - data.03-sriov-nova\.conf - options: - create: true - source: kind: ConfigMap name: edpm-nodeset-values