Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include containers in fat image #349

Closed
wants to merge 10 commits into from
33 changes: 33 additions & 0 deletions ansible/cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,38 @@
path: "/home/{{ ansible_user }}/.ssh/"
state: absent

# A podman pause process is started by `podman pull`, and causes an error
# Error: cannot re-exec process to join the existing user namespace
# in podman commands in the booted image due to its pidfile.
# Later podman versions may cleanup the pidfile automatically.
# NB: This hard-codes the default config that only `podman` user (=1001) runs containers.
- name: Check for running podman containers
command:
cmd: podman ps
become_user: podman
register: podman_ps
changed_when: false

- name: Ensure no containers are running
assert:
that: podman_ps.stdout_lines | length == 1 # header only
fail_msg: "podman user has running containers:\n{{ podman_ps.stdout }}"

- name: Get PID of podman pause process
command:
cmd: cat /tmp/podman-run-1001/libpod/tmp/pause.pid
register: podman_pause_pidfile
changed_when: false

- name: Kill pause process
command:
cmd: "kill {{ podman_pause_pidfile.stdout }}"
become_user: podman

- name: Remove pause pidfile
file:
path: /tmp/podman-run-1001/libpod/tmp/pause.pid
state: absent

- name: Run cloud-init cleanup
command: cloud-init clean --logs --seed
12 changes: 8 additions & 4 deletions ansible/fatimage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@
become: yes
gather_facts: no
tasks:
# - import_playbook: slurm.yml
# - import_playbook: slurm.yml:
- name: Setup DB
include_role:
name: mysql
tasks_from: install.yml
- name: OpenHPC
import_role:
name: stackhpc.openhpc
Expand All @@ -77,10 +81,10 @@
name: opensearch
tasks_from: install.yml
become: true

# opensearch - containerised, nothing to do
# slurm_stats - nothing to do
# filebeat - containerised - nothing to do
- import_role:
name: filebeat
tasks_from: install.yml

- import_role:
# can't only run cloudalchemy.node_exporter/tasks/install.yml as needs vars from preflight.yml and triggers service start
Expand Down
9 changes: 1 addition & 8 deletions ansible/monitoring.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,12 @@
# Collection currently requires root for all tasks.
become: true

- name: Setup filebeat
- name: Deploy filebeat
hosts: filebeat
tags: filebeat
tasks:
- import_role:
name: filebeat
tasks_from: config.yml
tags: config

- import_role:
name: filebeat
tasks_from: deploy.yml
tags: deploy

- name: Deploy node_exporter
hosts: node_exporter
Expand Down
7 changes: 0 additions & 7 deletions ansible/roles/filebeat/tasks/deploy.yml

This file was deleted.

17 changes: 17 additions & 0 deletions ansible/roles/filebeat/tasks/install.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
- name: Create systemd unit file
template:
dest: /etc/systemd/system/filebeat.service
src: filebeat.service.j2
become: true
register: _filebeat_unit

- name: Pull container image
containers.podman.podman_image:
name: "docker.elastic.co/beats/filebeat-oss"
tag: "{{ filebeat_version }}"
become_user: "{{ filebeat_podman_user }}"

- name: Reload filebeat unit file
command: systemctl daemon-reload
when: _filebeat_unit.changed
2 changes: 2 additions & 0 deletions ansible/roles/filebeat/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- import_tasks: install.yml
- import_tasks: runtime.yml
1 change: 0 additions & 1 deletion ansible/roles/filebeat/tasks/post.yml

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,13 @@
mode: 0600
notify: Restart filebeat container
become: true

- name: Flush handlers
meta: flush_handlers

- name: Ensure filebeat service state
systemd:
name: filebeat.service
state: started
enabled: true
become: true
6 changes: 6 additions & 0 deletions ansible/roles/mysql/tasks/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,9 @@
dest: /etc/systemd/system/mysql.service
src: mysql.service.j2
register: _mysql_unitfile

- name: Pull container image
containers.podman.podman_image:
name: "mysql"
tag: "{{ mysql_tag }}"
become_user: "{{ mysql_podman_user }}"
6 changes: 6 additions & 0 deletions ansible/roles/opensearch/tasks/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
src: opensearch.service.j2
register: _opensearch_unit

- name: Pull container image
containers.podman.podman_image:
name: "opensearchproject/opensearch"
tag: "{{ opensearch_version }}"
become_user: "{{ opensearch_podman_user }}"

- name: Reload opensearch unit file
command: systemctl daemon-reload
when: _opensearch_unit.changed
5 changes: 0 additions & 5 deletions ansible/roles/opensearch/tasks/runtime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,6 @@
notify: Restart opensearch service
become: true

- name: Pull container
containers.podman.podman_image:
name: "opensearchproject/opensearch:{{ opensearch_version }}"
become_user: "{{ opensearch_podman_user }}"

- name: Flush handlers
meta: flush_handlers

Expand Down
5 changes: 2 additions & 3 deletions environments/.stackhpc/ARCUS.pkrvars.hcl
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
flavor = "vm.ska.cpu.general.small"
use_blockstorage_volume = true
volume_size = 10 # GB
volume_size = 12 # GB. Compatible with SMS-lab's general.v1.tiny
image_disk_format = "qcow2"
networks = ["4b6b2722-ee5b-40ec-8e52-a6610e14cc51"] # portal-internal (DNS broken on ilab-60)
source_image_name = "openhpc-230804-1754-80b8d714" # https://github.com/stackhpc/ansible-slurm-appliance/pull/298
fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2"
ssh_keypair_name = "slurm-app-ci"
ssh_private_key_file = "~/.ssh/id_rsa"
security_groups = ["default", "SSH"]
ssh_bastion_host = "128.232.222.183"
ssh_bastion_username = "slurm-app-ci"
floating_ip_network = "CUDN-Internet" # Use FIP to avoid docker ratelimits on portal-internal outbound IP
1 change: 1 addition & 0 deletions environments/.stackhpc/inventory/group_vars/builder.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#update_enable: false # Can uncomment for speed debugging non-update related build issues
2 changes: 1 addition & 1 deletion environments/.stackhpc/terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ variable "cluster_name" {
variable "cluster_image" {
description = "single image for all cluster nodes - a convenience for CI"
type = string
default = "openhpc-240102-1025-e533fd70" # https://github.com/stackhpc/ansible-slurm-appliance/pull/346
default = "openhpc-240112-1705-1f2656f5" # https://github.com/stackhpc/ansible-slurm-appliance/pull/349
# default = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2"
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# NOTE: Might be better of as extra vars or in a builder specific inventory as
# as dependent on alphabetical ordering of groups, so if these variables are
# defined elsewhere the group that is ordered lower will determine the values.
update_enable: true
openhpc_slurm_service_started: false
nfs_client_mnt_state: present
block_devices_partition_state: skip
Expand Down
2 changes: 1 addition & 1 deletion packer/openhpc_extravars.yml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
update_enable: true
workaround_ansible_issue_61497: yes # extravars files can't be empty
4 changes: 2 additions & 2 deletions packer/openstack.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,12 @@ variable "image_visibility" {

variable "ssh_bastion_host" {
type = string
default = ""
default = null
}

variable "ssh_bastion_username" {
type = string
default = ""
default = null
}

variable "ssh_bastion_private_key_file" {
Expand Down
Loading