From b317ad3729eea609910634ce3d8b2e43fb01588f Mon Sep 17 00:00:00 2001 From: David Vallee Delisle Date: Thu, 10 Feb 2022 13:34:18 -0500 Subject: [PATCH] Adding Hugepages role parameter Hugepages management was always a manual step done by operators via the TripleO parameter ``KernelArgs``. This is error prone and causing confusion. The new ``Hugepages`` parameter allow operators to define hugepages as dictionnary, making it easier to read and follow. To prevent unvolontary changes, there's multiple validations before applying a change: - We convert the current running configurations to an actual dictionnary that we validate the new format against - If no change is necessary, even though the format might not be the same, there's no kernel_args update. - By default, we don't remove hugepages in places except when operators specifically set the ``ReconfigureHugepages`` to true. This change is also opening the door to more automations and automatic tuning. Related: https://bugzilla.redhat.com/show_bug.cgi?id=2043588 Depends-On: https://review.opendev.org/c/openstack/tripleo-ansible/+/830869 Change-Id: I1e05a5ea17c858a86acc170cfb91288884664b05 --- .../kernel-boot-params-baremetal-ansible.yaml | 33 +++++++++++++++++++ ...leo-kernel-hugepages-424c19a4b1579af8.yaml | 23 +++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 releasenotes/notes/tripleo-kernel-hugepages-424c19a4b1579af8.yaml diff --git a/deployment/kernel/kernel-boot-params-baremetal-ansible.yaml b/deployment/kernel/kernel-boot-params-baremetal-ansible.yaml index a042a46a73..c20edf40b9 100644 --- a/deployment/kernel/kernel-boot-params-baremetal-ansible.yaml +++ b/deployment/kernel/kernel-boot-params-baremetal-ansible.yaml @@ -67,6 +67,33 @@ parameters: default: 900 type: number description: Timeout in seconds to specify the wait time for ansible node reboot + Hugepages: + default: {} + type: json + description: > + Hash for hugepage configuration + Example: + Hugepages: + 2048: + count: 100 + default: false + 1048576: + count: 100 + default: true + tags: + - role_specific + ReconfigureHugepages: + default: false + type: boolean + description: > + By default, tripleo-kernel will configure the provided Hugepages as specified by operators + but it won't try to remove Hugepages configured on the host that are not part of the TripleO + paramater. Enabling this setting will make sure everything is reconfigured exactly like the + TripleO parameter, otherwise we just add the content of the Hugepages parameter to what's + already in place. + tags: + - role_specific + resources: RoleParametersValue: @@ -80,12 +107,16 @@ resources: isolated_cores: IsolCpusList kernel_args: KernelArgs kernel_args_defer_reboot: KernelArgsDeferReboot + hugepages: Hugepages + reconfigure_hugepages: ReconfigureHugepages - values: {get_param: [RoleParameters]} - values: TunedProfileName: {get_param: TunedProfileName} IsolCpusList: {get_param: IsolCpusList} KernelArgs: {get_param: KernelArgs} KernelArgsDeferReboot: {get_param: KernelArgsDeferReboot} + Hugepages: {get_param: Hugepages} + ReconfigureHugepages: {get_param: ReconfigureHugepages} outputs: role_data: @@ -109,6 +140,8 @@ outputs: tasks_from: kernelargs.yml vars: tripleo_kernel_args: {get_attr: [RoleParametersValue, value, kernel_args]} + tripleo_kernel_hugepages: {get_attr: [RoleParametersValue, value, hugepages]} + tripleo_kernel_hugepages_remove: {get_attr: [RoleParametersValue, value, reconfigure_hugepages]} tripleo_kernel_defer_reboot: {get_attr: [RoleParametersValue, value, kernel_args_defer_reboot]} tripleo_kernel_reboot_timeout: {get_param: NodeRebootWaitTimeout} upgrade_tasks: diff --git a/releasenotes/notes/tripleo-kernel-hugepages-424c19a4b1579af8.yaml b/releasenotes/notes/tripleo-kernel-hugepages-424c19a4b1579af8.yaml new file mode 100644 index 0000000000..558878635a --- /dev/null +++ b/releasenotes/notes/tripleo-kernel-hugepages-424c19a4b1579af8.yaml @@ -0,0 +1,23 @@ +--- +features: + - | + Adding Hugepages role parameter + + Hugepages management was always a manual step done by operators via the + TripleO parameter ``KernelArgs``. This is error prone and causing confusion. + + The new ``Hugepages`` parameter allow operators to define hugepages as + dictionnary, making it easier to read and follow. + + To prevent unvolontary changes, there's multiple validations before + applying a change: + + - We convert the current running configurations to an actual dictionnary + that we validate the new format against + - If no change is necessary, even though the format might not be the same, + there's no kernel_args update. + - By default, we don't remove hugepages in places except when operators + specifically set the ``ReconfigureHugepages`` to true. + + This change is also opening the door to more automations and automatic + tuning.