Skip to content

Commit

Permalink
[Draft] Split nvidia-mdev arch in two stages
Browse files Browse the repository at this point in the history
  • Loading branch information
sbauza committed Oct 10, 2024
1 parent 2944874 commit 10a6062
Show file tree
Hide file tree
Showing 14 changed files with 311 additions and 64 deletions.
31 changes: 29 additions & 2 deletions automation/vars/nvidia-mdev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ vas:
src_file: values.yaml
build_output: nodeset.yaml
post_stage_run:
- name: Install nvidia driver
- name: Run phase 1 playbook
type: playbook
# As a reminder, the job needs to set the nvidia driver URL
source: "../../playbooks/nvidia-mdev.yml"
source: "../../playbooks/nvidia-mdev-phase1.yml"
inventory: "${HOME}/ci-framework-data/artifacts/zuul_inventory.yml"

- path: examples/va/nvidia-mdev/edpm/deployment
Expand All @@ -53,3 +53,30 @@ vas:
- name: edpm-deployment-values
src_file: values.yaml
build_output: deployment.yaml

- path: examples/va/nvidia-mdev/edpm-post-driver/nodeset
wait_conditions:
- >-
oc -n openstack wait
osdpns openstack-edpm --for condition=SetupReady
--timeout=60m
values:
- name: edpm-post-driver-nodeset-values
src_file: values.yaml
build_output: nodeset-post-driver.yaml
post_stage_run:
- name: Run phase 2 playbook
type: playbook
source: "../../playbooks/nvidia-mdev-phase2.yml"
inventory: "${HOME}/ci-framework-data/artifacts/zuul_inventory.yml"

- path: examples/va/nvidia-mdev/edpm-post-driver/deployment
wait_conditions:
- >-
oc -n openstack wait
osdpns openstack-edpm --for condition=Ready
--timeout=60m
values:
- name: edpm-post-driver-deployment-values
src_file: values.yaml
build_output: deployment-post-driver.yaml
2 changes: 2 additions & 0 deletions examples/va/nvidia-mdev/edpm-post-driver/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dataplane-deployment.yaml
dataplane-nodeset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dataplane-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

components:
- ../../../../../va/nvidia-mdev/edpm-post-driver/deployment
# - https://github.com/openstack-k8s-operators/architecture/va/nvidia-mdev/edpm-post-driver/deployment?ref=main
## It's possible to replace ../../../../../va/nvidia-mdev/edpm-post-driver/deployment/ with a git checkout URL as per:
## https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md

resources:
- values.yaml
10 changes: 10 additions & 0 deletions examples/va/nvidia-mdev/edpm-post-driver/deployment/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# yamllint disable rule:line-length
# local-config: referenced, but not emitted by kustomize
---
apiVersion: v1
kind: ConfigMap
metadata:
name: edpm-deployment-values
annotations:
config.kubernetes.io/local-config: "true"
data: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dataplane-nodeset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

components:
- ../../../../../va/nvidia-mdev/edpm-post-driver/nodeset
# - https://github.com/openstack-k8s-operators/architecture/va/nvidia-mdev/edpm-post-driver/nodeset?ref=main
## It's possible to replace ../../../../../va/nvidia-mdev/edpm-post-driver/nodeset/ with a git checkout URL as per:
## https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md

resources:
- values.yaml
148 changes: 148 additions & 0 deletions examples/va/nvidia-mdev/edpm-post-driver/nodeset/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# yamllint disable rule:line-length
# local-config: referenced, but not emitted by kustomize
---
apiVersion: v1
kind: ConfigMap
metadata:
name: edpm-nodeset-values
annotations:
config.kubernetes.io/local-config: "true"
data:
root_password: cmVkaGF0Cg==
preProvisioned: false
baremetalSetTemplate:
ctlplaneInterface: eno2 # CHANGEME
cloudUserName: cloud-admin
provisioningInterface: enp1s0 # CHANGEME
bmhLabelSelector:
app: openstack # CHANGEME
passwordSecret:
name: baremetalset-password-secret
namespace: openstack
ssh_keys:
# Authorized keys that will have access to the dataplane computes via SSH
authorized: CHANGEME
# The private key that will have access to the dataplane computes via SSH
private: CHANGEME2
# The public key that will have access to the dataplane computes via SSH
public: CHANGEME3
nodeset:
ansible:
ansibleUser: cloud-admin
ansiblePort: 22
ansibleVars:
# CHANGEME -- see https://access.redhat.com/solutions/253273
# edpm_bootstrap_command: |
# subscription-manager register --username <subscription_manager_username> --password <subscription_manager_password>
# podman login -u <registry_username> -p <registry_password> registry.redhat.io
timesync_ntp_servers:
- hostname: pool.ntp.org
# CPU pinning settings
edpm_kernel_args: "default_hugepagesz=1GB hugepagesz=1G hugepages=16 intel_iommu=on iommu=pt isolcpus=4-23,28-47"
edpm_tuned_profile: "cpu-partitioning-powersave"
edpm_tuned_isolated_cores: "4-23,28-47"
# edpm_network_config
# These vars are edpm_network_config role vars
edpm_network_config_hide_sensitive_logs: false
edpm_network_config_os_net_config_mappings:
edpm-compute-0:
nic2: 6c:fe:54:3f:8a:02 # CHANGEME
nic3: 6c:fe:54:3f:8a:03 # CHANGEME
edpm-compute-1:
nic2: 6b:fe:54:3f:8a:02 # CHANGEME
nic3: 6b:fe:54:3f:8a:03 # CHANGEME
edpm_network_config_template: |
---
{% set mtu_list = [ctlplane_mtu] %}
{% for network in nodeset_networks %}
{{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }}
{%- endfor %}
{% set min_viable_mtu = mtu_list | max %}
network_config:
- type: ovs_bridge
name: {{ neutron_physical_bridge_name }}
mtu: {{ min_viable_mtu }}
use_dhcp: false
dns_servers: {{ ctlplane_dns_nameservers }}
domain: {{ dns_search_domains }}
addresses:
- ip_netmask: {{ ctlplane_ip }}/{{ ctlplane_cidr }}
routes: {{ ctlplane_host_routes }}
members:
- type: interface
name: nic2
mtu: {{ min_viable_mtu }}
# force the MAC address of the bridge to this interface
primary: true
{% for network in nodeset_networks %}
- type: vlan
mtu: {{ lookup('vars', networks_lower[network] ~ '_mtu') }}
vlan_id: {{ lookup('vars', networks_lower[network] ~ '_vlan_id') }}
addresses:
- ip_netmask:
{{ lookup('vars', networks_lower[network] ~ '_ip') }}/{{ lookup('vars', networks_lower[network] ~ '_cidr') }}
routes: {{ lookup('vars', networks_lower[network] ~ '_host_routes') }}
{% endfor %}
- type: sriov_pf
name: nic3
numvfs: 10
use_dhcp: false
promisc: true
# These vars are for the network config templates themselves and are
# considered EDPM network defaults.
neutron_physical_bridge_name: br-ex
neutron_public_interface_name: eth0
# edpm_nodes_validation
edpm_nodes_validation_validate_controllers_icmp: false
edpm_nodes_validation_validate_gateway_icmp: false
dns_search_domains: []
gather_facts: false
# edpm firewall, change the allowed CIDR if needed
edpm_sshd_configure_firewall: true
edpm_sshd_allowed_ranges:
- 192.168.122.0/24
# SRIOV settings
edpm_neutron_sriov_agent_SRIOV_NIC_physical_device_mappings: 'sriov-phy4:eno4'
networks:
- defaultRoute: true
name: ctlplane
subnetName: subnet1
- name: internalapi
subnetName: subnet1
- name: storage
subnetName: subnet1
- name: tenant
subnetName: subnet1
nodes:
edpm-compute-0:
hostName: edpm-compute-0
edpm-compute-1:
hostName: edpm-compute-1
services:
- neutron-ovn
- nova-custom-sriov
- neutron-sriov
- neutron-metadata
nova:
compute:
conf: |
# CHANGEME
[DEFAULT]
reserved_host_memory_mb = 4096
reserved_huge_pages = node:0,size:4,count:524160
reserved_huge_pages = node:1,size:4,count:524160
[compute]
cpu_shared_set = 0-3,24-27
cpu_dedicated_set = 8-23,32-47
[devices]
mdev_enabled_types = nvidia-268
migration:
ssh_keys:
private: CHANGEME4
public: CHANGEME5
pci:
conf: |
# CHANGEME
[pci]
device_spec = {"vendor_id":"8086", "product_id":"1572", "address": "0000:19:00.3", "physical_network":"sriov-phy4", "trusted":"true"}
34 changes: 0 additions & 34 deletions examples/va/nvidia-mdev/edpm/nodeset/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,41 +120,7 @@ data:
edpm-compute-1:
hostName: edpm-compute-1
services:
- bootstrap
- download-cache
- configure-network
- validate-network
- install-os
- configure-os
- ssh-known-hosts
- run-os
- reboot-os
- install-certs
- libvirt
- ovn
- neutron-ovn
- nova-custom-sriov
- neutron-sriov
- neutron-metadata
nova:
compute:
conf: |
# CHANGEME
[DEFAULT]
reserved_host_memory_mb = 4096
reserved_huge_pages = node:0,size:4,count:524160
reserved_huge_pages = node:1,size:4,count:524160
[compute]
cpu_shared_set = 0-3,24-27
cpu_dedicated_set = 8-23,32-47
[devices]
mdev_enabled_types = nvidia-268
migration:
ssh_keys:
private: CHANGEME4
public: CHANGEME5
pci:
conf: |
# CHANGEME
[pci]
device_spec = {"vendor_id":"8086", "product_id":"1572", "address": "0000:19:00.3", "physical_network":"sriov-phy4", "trusted":"true"}
21 changes: 21 additions & 0 deletions va/nvidia-mdev/edpm-post-driver/deployment/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

transformers:
# Set namespace to OpenStack on all namespaced objects without a namespace
- |-
apiVersion: builtin
kind: NamespaceTransformer
metadata:
name: _ignored_
namespace: openstack
setRoleBindingSubjects: none
unsetOnly: true
fieldSpecs:
- path: metadata/name
kind: Namespace
create: true
components:
- ../../../../lib/dataplane/deployment
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
apiVersion: v1
data:
NodeRootPassword: _replaced_
kind: Secret
metadata:
name: baremetalset-password-secret
namespace: openstack
type: Opaque
66 changes: 66 additions & 0 deletions va/nvidia-mdev/edpm-post-driver/nodeset/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
---
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

transformers:
# Set namespace to OpenStack on all namespaced objects without a namespace
- |-
apiVersion: builtin
kind: NamespaceTransformer
metadata:
name: _ignored_
namespace: openstack
setRoleBindingSubjects: none
unsetOnly: true
fieldSpecs:
- path: metadata/name
kind: Namespace
create: true
components:
- ../../../../lib/dataplane/nodeset

resources:
- baremetalset-password-secret.yaml
- nova_sriov.yaml

replacements:
- source:
kind: ConfigMap
name: edpm-nodeset-values
fieldPath: data.root_password
targets:
- select:
kind: Secret
name: baremetalset-password-secret
fieldPaths:
- data.NodeRootPassword
options:
create: true

# Nova compute CPU pinning customization
- source:
kind: ConfigMap
name: edpm-nodeset-values
fieldPath: data.nova.compute.conf
targets:
- select:
kind: ConfigMap
name: cpu-pinning-nova
fieldPaths:
- data.25-cpu-pinning-nova\.conf
options:
create: true
# Nova compute PCI passthrough customization
- source:
kind: ConfigMap
name: edpm-nodeset-values
fieldPath: data.nova.pci.conf
targets:
- select:
kind: ConfigMap
name: sriov-nova
fieldPaths:
- data.03-sriov-nova\.conf
options:
create: true
File renamed without changes.
Loading

0 comments on commit 10a6062

Please sign in to comment.