Skip to content

Commit

Permalink
Merge pull request #341 from stackhpc/feat/base-RL89
Browse files Browse the repository at this point in the history
Update fatimage base to RL8.9 with robust volume mounts
  • Loading branch information
sjpb authored Dec 14, 2023
2 parents 44608ac + 9b34524 commit 1e68a91
Show file tree
Hide file tree
Showing 10 changed files with 17 additions and 72 deletions.
23 changes: 0 additions & 23 deletions ansible/roles/cluster_infra/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,29 +57,6 @@
- terraform_state == "present"
- cluster_upgrade_system_packages is not defined or not cluster_upgrade_system_packages

- name: Detect volume device prefix from image metadata
block:
- name: Get image metadata from OpenStack API
openstack.cloud.image_info:
image: "{{ cluster_previous_image | default(cluster_image) }}"
register: cluster_image_info
- name: Check only single image found
assert:
that: cluster_image_info.images | length == 1
fail_msg: "Multiple images found for 'cluster_image' {{ cluster_image }}"
- name: Set volume_device_prefix fact
set_fact:
block_device_prefix: >-
{{
'sd' if (cluster_image_info.images | first).hw_scsi_model is defined and
(cluster_image_info.images | first).hw_scsi_model in scsi_models
else 'vd'
}}
# Only run when block_device_prefix isn't set as an extravar
when:
- block_device_prefix is not defined
- cluster_image is defined

- name: Template Terraform files into project directory
template:
src: >-
Expand Down
17 changes: 6 additions & 11 deletions ansible/roles/cluster_infra/templates/resources.tf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -358,18 +358,13 @@ resource "openstack_compute_instance_v2" "control" {
{%- for ssh_key in cluster_deploy_ssh_keys_extra %}
- {{ ssh_key }}
{%- endfor %}
fs_setup:
- label: state
filesystem: ext4
device: /dev/{{ block_device_prefix }}b
partition: auto
- label: home
filesystem: ext4
device: /dev/{{ block_device_prefix }}c
partition: auto
bootcmd:
%{for volume in [openstack_blockstorage_volume_v3.state, openstack_blockstorage_volume_v3.home]}
- BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${substr(volume.id, 0, 20)}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L ${lower(split(" ", volume.description)[0])} $BLKDEV
%{endfor}
mounts:
- [LABEL=state, /var/lib/state, auto, "x-systemd.required-by=nfs-server.service,x-systemd.before=nfs-server.service"]
- [LABEL=home, /exports/home, auto, "x-systemd.required-by=nfs-server.service,x-systemd.before=nfs-server.service"]
- [LABEL=state, {{ appliances_state_dir }}, auto]
- [LABEL=home, /exports/home, auto]
EOF
}

Expand Down
2 changes: 1 addition & 1 deletion environments/.stackhpc/ARCUS.pkrvars.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ volume_size = 10 # GB
image_disk_format = "qcow2"
networks = ["4b6b2722-ee5b-40ec-8e52-a6610e14cc51"] # portal-internal (DNS broken on ilab-60)
source_image_name = "openhpc-230804-1754-80b8d714" # https://github.com/stackhpc/ansible-slurm-appliance/pull/298
fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.8-20230518.0.x86_64.qcow2"
fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2"
ssh_keypair_name = "slurm-app-ci"
ssh_private_key_file = "~/.ssh/id_rsa"
security_groups = ["default", "SSH"]
Expand Down
2 changes: 1 addition & 1 deletion environments/.stackhpc/SMS.pkrvars.hcl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
flavor = "general.v1.tiny"
networks = ["26023e3d-bc8e-459c-8def-dbd47ab01756"] # stackhpc-ipv4-geneve
source_image_name = "openhpc-230503-0944-bf8c3f63" # https://github.com/stackhpc/ansible-slurm-appliance/pull/252
fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.8-20230518.0.x86_64.qcow2"
fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2"
ssh_keypair_name = "slurm-app-ci"
ssh_private_key_file = "~/.ssh/id_rsa"
security_groups = ["default", "SSH"]
Expand Down
2 changes: 0 additions & 2 deletions environments/.stackhpc/terraform/ARCUS.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,3 @@ cluster_subnet = "portal-internal"
vnic_type = "normal"
control_node_flavor = "vm.ska.cpu.general.quarter"
other_node_flavor = "vm.ska.cpu.general.small"
state_volume_device_path = "/dev/sdb"
home_volume_device_path = "/dev/sdc"
2 changes: 0 additions & 2 deletions environments/.stackhpc/terraform/SMS.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,3 @@ cluster_subnet = "stackhpc-ipv4-geneve-subnet"
vnic_type = "normal"
control_node_flavor = "general.v1.medium"
other_node_flavor = "general.v1.tiny"
state_volume_device_path = "/dev/vdb"
home_volume_device_path = "/dev/vdc"
10 changes: 2 additions & 8 deletions environments/.stackhpc/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ variable "cluster_name" {
variable "cluster_image" {
description = "single image for all cluster nodes - a convenience for CI"
type = string
default = "openhpc-231206-1648-9d6aa4e4" # https://github.com/stackhpc/ansible-slurm-appliance/pull/340
# default = "Rocky-8-GenericCloud-Base-8.8-20230518.0.x86_64.qcow2"
default = "openhpc-231208-1207-b69af6e2" # https://github.com/stackhpc/ansible-slurm-appliance/pull/341
# default = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2"
}

variable "cluster_net" {}
Expand All @@ -31,10 +31,6 @@ variable "volume_backed_instances" {
default = false
}

variable "state_volume_device_path" {}

variable "home_volume_device_path" {}

module "cluster" {
source = "../../skeleton/{{cookiecutter.environment}}/terraform/"

Expand Down Expand Up @@ -76,6 +72,4 @@ module "cluster" {
state_volume_size = 10
home_volume_size = 20

state_volume_device_path = var.state_volume_device_path
home_volume_device_path = var.home_volume_device_path
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,19 +126,14 @@ resource "openstack_compute_instance_v2" "control" {
#cloud-config
fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix}
fs_setup:
- label: state
filesystem: ext4
device: ${var.state_volume_device_path}
partition: auto
- label: home
filesystem: ext4
device: ${var.home_volume_device_path}
partition: auto
bootcmd:
%{for volume in [openstack_blockstorage_volume_v3.state, openstack_blockstorage_volume_v3.home]}
- BLKDEV=$(readlink -f $(ls /dev/disk/by-id/*${substr(volume.id, 0, 20)}* | head -n1 )); blkid -o value -s TYPE $BLKDEV || mke2fs -t ext4 -L ${lower(split(" ", volume.description)[0])} $BLKDEV
%{endfor}
mounts:
- [LABEL=state, ${var.state_dir}]
- [LABEL=home, /exports/home, auto, "x-systemd.required-by=nfs-server.service,x-systemd.before=nfs-server.service"]
- [LABEL=home, /exports/home]
EOF

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,6 @@ variable "environment_root" {
description = "Path to environment root, automatically set by activate script"
}

variable "state_volume_device_path" {
type = string
description = "Path to block device for state"
default = "/dev/sdb"
}

variable "home_volume_device_path" {
type = string
description = "Path to block device name for home directories"
default = "/dev/sdc"
}

variable "state_dir" {
type = string
description = "Path to state directory on control node"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
resource "openstack_blockstorage_volume_v3" "state" {
name = "${var.cluster_name}-state"
description = "State for control node"
description = "State for control node" # first word used to label filesystem
size = var.state_volume_size
}

resource "openstack_blockstorage_volume_v3" "home" {
name = "${var.cluster_name}-home"
description = "Home for control node"
description = "Home for control node" # first word used to label filesystem
size = var.home_volume_size
}

0 comments on commit 1e68a91

Please sign in to comment.