Skip to content

Commit

Permalink
Ubuntu 20.04 support (#1888)
Browse files Browse the repository at this point in the history
* domain join

* InfluxbDB

* Grafana

* cyclecloud

* OpenPBS

* openpbs

* fix hook and scalelib install

* wrong venv location

* remove autocomplete as it breaks pbsdataservice start

* OpenPBS client

* PAM auth

* Open OnDemand

* ood-applications

* support for pbs 22

* telegraf

* Chrony

* tests with ubuntu

* pipelines for Ubuntu

* private DNS zone

* don't use cvmfs on ubuntu

* use pwauth for auth

* pwauth fix for ubuntu

* enable authnz_external module

* force apache conf file generation

* installing required packages for a2enmod

* fix init hpc test user home directory

* fix genssh

* update UI tests

* fix shell UI test

* move alma pwauth install in role

* unique peering name deployment

* NFS Mount outside of Domain Join

* slurm server

* installing certs in ubuntu

* introducing changes to create ssl certificate in public ip scenario

* introducing cleaner method to create ssl certificate in public ip scenario

* fixing bug that causes ood playbook failure with basic auth

* use package for installing pyxis pre-reqs

* some minor fix for azslurm

* slurm client

* revert back the ood install command

* fix installation of azure-slurm

* certificate install by cycle

* Update slurmdbd service to start after the home directory is mounted

* move on 23.02.5

---------

Co-authored-by: egmsft <[email protected]>
  • Loading branch information
xpillons and egmsft authored Apr 19, 2024
1 parent d27bd64 commit 0acf6e7
Show file tree
Hide file tree
Showing 97 changed files with 1,035 additions and 493 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/all_bicep.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
home_type: ['azurefiles', 'anf']
user_auth: ['local', 'ad']
monitoring: ['none', 'grafana']
infra_os: ['almalinux']
infra_os: ['ubuntu', 'almalinux']
uses: ./.github/workflows/z_base_callable.yml
with:
resource_group: 'AUTO_GENERATED'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/all_manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ on:
required: false
default: "['grafana']" # use it with ${{ github.event.inputs.monitoring }}
infra_os:
description: "Infrastructure OS - ['centos', 'almalinux'] - Default to ['centos']"
description: "Infrastructure OS - ['centos', 'almalinux', 'ubuntu'] - Default to ['almalinux']"
required: false
default: "['almalinux']" # use it with ${{ github.event.inputs.infra_os }}
deploy_with:
Expand Down
10 changes: 3 additions & 7 deletions .github/workflows/configs/almalinux.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
linux_base_image: almalinux:almalinux-x86_64:8_7-gen2:latest # almalinux:almalinux-hpc:8_7-hpc-gen2:latest # publisher:offer:sku:version or image_id
# linux image plan if required, format is publisher:product:name
#linux_base_plan: almalinux:almalinux-hpc:8_7-hpc-gen2
# Enable cvmfs-eessi - disabled by default
cvmfs_eessi:
enabled: true

queues:
- name: htc
Expand Down Expand Up @@ -44,13 +47,6 @@ queues:
ColocateNodes: true
EnableAcceleratedNetworking: true

- name: hbv3u20
vm_size: Standard_HB120rs_v3
max_count: 10
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: true
EnableAcceleratedNetworking: true

# Remote Viz Queues
- name: viz3d
type: remoteviz
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/configs/base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,6 @@ usergroups:
gid: 5002
description: "For users with sudo right or local admin right on nodes"

# Enable cvmfs-eessi - disabled by default
cvmfs_eessi:
enabled: true

enroot:
enroot_version: 3.4.1

Expand Down
15 changes: 0 additions & 15 deletions .github/workflows/configs/centos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,6 @@ queues:
ColocateNodes: true
EnableAcceleratedNetworking: true

- name: hbv3al
vm_size: Standard_HB120rs_v3
max_count: 10
image: azhpc:azhop-compute:almalinux-8_7:latest
__ALMA8_PLAN__
ColocateNodes: true
EnableAcceleratedNetworking: true

- name: hbv3u20
vm_size: Standard_HB120rs_v3
max_count: 10
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: true
EnableAcceleratedNetworking: true

# Remote Viz Queues
- name: viz3d
type: remoteviz
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/configs/slurm_3.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ slurm:
# Enable SLURM accounting, this will create a SLURM accounting database in a managed MySQL server instance
accounting_enabled: true
# SLURM version to install.
slurm_version: 22.05.9
slurm_version: 23.02.5 #22.05.9 is creating a core dumped with Slurmdbd
cyclecloud_slurm_version: 3.0.4
82 changes: 82 additions & 0 deletions .github/workflows/configs/ubuntu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
linux_base_image: canonical:0001-com-ubuntu-server-focal:20_04-lts-gen2:latest # almalinux:almalinux-hpc:8_7-hpc-gen2:latest # publisher:offer:sku:version or image_id
# linux image plan if required, format is publisher:product:name
#linux_base_plan: almalinux:almalinux-hpc:8_7-hpc-gen2

# Enable cvmfs-eessi - disabled by default
cvmfs_eessi:
enabled: false

queues:
- name: htc
vm_size: Standard_F2s_v2
max_count: 256
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: false

- name: hc44rs
vm_size: Standard_HC44rs
max_count: 10
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: true
EnableAcceleratedNetworking: true

- name: hb60rs
vm_size: Standard_HB60rs
max_count: 10
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: true
EnableAcceleratedNetworking: true

- name: hb120v2
vm_size: Standard_HB120rs_v2
max_count: 10
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: true
EnableAcceleratedNetworking: true

- name: hpc
vm_size: Standard_HB120rs_v3
max_count: 10
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: true
EnableAcceleratedNetworking: true

- name: gpu
vm_size: Standard_NC24ads_A100_v4
max_count: 4
image: azhpc:azhop-compute:ubuntu-20_04:latest
ColocateNodes: true
EnableAcceleratedNetworking: true

# Remote Viz Queues
- name: viz3d
type: remoteviz
description: "With GPU - Small GPU node for single session"
vm_size: Standard_NV12s_v3
max_count: 4
image: azhpc:azhop-desktop:ubuntu-20_04:latest
ColocateNodes: false
EnableAcceleratedNetworking: true

- name: viz
type: remoteviz
description: "Without GPU - for single session"
vm_size: Standard_D8s_v5
max_count: 10
image: azhpc:azhop-desktop:ubuntu-20_04:latest
ColocateNodes: false
EnableAcceleratedNetworking: true
max_hours: 12 # Maximum session duration
min_hours: 1 # Minimum session duration - 0 is infinite

- name: largeviz3d
type: remoteviz
description: "Large With GPU - Intended for shared sessions"
shareable: true
vm_size: Standard_NV48s_v3
max_count: 4
image: azhpc:azhop-desktop:ubuntu-20_04:latest
ColocateNodes: false
EnableAcceleratedNetworking: true
max_hours: 12 # Maximum session duration
min_hours: 1 # Minimum session duration - 0 is infinite
2 changes: 1 addition & 1 deletion .github/workflows/z_base_callable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ on:
default: 'true' # use it with ${{ inputs.clean }}
type: string
infra_os:
description: 'Operating System for the infrastructure: centos/almalinux - Default to centos'
description: 'Operating System for the infrastructure: centos/almalinux/ubuntu - Default to almalinux'
required: false
default: 'almalinux' # use it with ${{ inputs.infra_os }}
type: string
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/z_create_config_callable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ on:
default: 'grafana' # use it with ${{ inputs.monitoring }}
type: string
infra_os:
description: 'Operating System for the infrastructure: centos/almalinux - Default to centos'
description: 'Operating System for the infrastructure: centos/almalinux/ubuntu - Default to almalinux'
required: false
default: 'almalinux' # use it with ${{ inputs.infra_os }}
type: string
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/z_uitesting_callable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/[email protected]
with:
node-version: '14.x'
node-version: '16.x'
- name: Install dependencies
run: |
apt-get update
Expand Down
2 changes: 1 addition & 1 deletion bicep/mainTemplate.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ module azhopDeployment './azhop.bicep' = {

var vnetPeerings = contains(azhopConfig.network, 'peering') ? azhopConfig.network.peering : []
module azhopPeerings './vnetpeering.bicep' = [ for peer in vnetPeerings: {
name: 'peer_from${peer.vnet_name}'
name: 'peer_from${peer.vnet_name}_${guid(azhopResourceGroup.id)}'
scope: resourceGroup(peer.vnet_resource_group)
params: {
name: '${azhopConfig.resource_group}_${azhopConfig.network.vnet.name}'
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"devDependencies": {
"@playwright/test": "^1.38.0"
"@playwright/test": "^1.42.1"
},
"dependencies": {
"js-yaml": "^4.1.0"
Expand Down
8 changes: 0 additions & 8 deletions playbooks/ccportal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,3 @@
install_cyclecloud: '{{not "azurecyclecloud" in (cyclecloud.image | default(""))}}' # don't install cyclecloud when using the azurecyclecloud marketplace image
cc_webserverpath: '{{cyclecloud.web_server_path | default("")}}'
cycle_distribution_method: '{{cyclecloud.distribution_method | default("azhop")}}'

- name: Update Packages
include_role:
name: pkg_update
apply:
become: true
vars:
packages_to_exclude_from_upgrade: "cyclecloud*"
2 changes: 1 addition & 1 deletion playbooks/chrony.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

- hosts: jumpbox, ondemand, scheduler, grafana, ccportal
become: true
gather_facts: no
gather_facts: yes
vars_files:
- '{{global_config_file}}'

Expand Down
2 changes: 1 addition & 1 deletion playbooks/grafana.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,4 @@
apply:
become: true
vars:
packages_to_exclude_from_upgrade: "grafana*"
packages_to_exclude_from_upgrade: ["grafana"]
109 changes: 57 additions & 52 deletions playbooks/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
include_role:
name: pkg_update


- name: Join AD domain or create local users, mount shared home
hosts: scheduler, ondemand, grafana
become: true
Expand All @@ -40,56 +39,54 @@
- name: Gather facts for first time
setup:

- name: Install dependencies
yum:
name: epel-release, nfs-utils, python3
lock_timeout : 180
state: present

# use the private DNS when defined
- name: setup private domain if not set in resolv.conf
shell: |
if ! grep -q {{private_dns.name}} /etc/resolv.conf; then
sed -i 's/search /search {{private_dns.name}} /' /etc/resolv.conf
# Stop NetworkManager overwriting /etc/resolv.conf
cat > /etc/NetworkManager/conf.d/90-dns-none.conf << EOF
[main]
dns=none
EOF
fi
- name: setup private domain
block:
- name: setup private domain if not set in resolv.conf (for CentOS and AlmaLinux)
shell: |
if ! grep -q {{private_dns.name}} /etc/resolv.conf; then
sed -i 's/search /search {{private_dns.name}} /' /etc/resolv.conf
# Stop NetworkManager overwriting /etc/resolv.conf
cat > /etc/NetworkManager/conf.d/90-dns-none.conf << EOF
[main]
dns=none
EOF
fi
when: ansible_distribution in ['CentOS', 'AlmaLinux']

- name: setup private domain if not set in resolv.conf (for Ubuntu)
shell: |
if ! grep -q {{private_dns.name}} /etc/resolv.conf; then
sed -i 's/search /search {{private_dns.name}} /' /etc/resolv.conf
fi
when: ansible_distribution in ['Ubuntu']
when:
- private_dns.create | default(false)

- name: mount home
block:
- name: debug
debug:
msg:
- '{{ anf_home_ip }}:/{{ anf_home_path }}'
- '{{ homedir_mountpoint }}'
- '{{ anf_home_opts }}'
- name: Mount home NFS volume
mount:
src: '{{ anf_home_ip }}:/{{ anf_home_path }}'
path: '{{ homedir_mountpoint }}'
opts: '{{ anf_home_opts }}'
state: mounted
fstype: nfs
register: mount_success
until: mount_success is succeeded
retries: 3
delay: 30

- name: Disable SELinux
selinux:
state: disabled
register: selinux

- name: reboot
reboot:
when: selinux.reboot_required

when: (authentication.user_auth | default('ad')) == "local"
- name: debug
debug:
msg:
- '{{ anf_home_ip }}:/{{ anf_home_path }}'
- '{{ homedir_mountpoint }}'
- '{{ anf_home_opts }}'

- name: Mount home NFS volume
include_role:
name: nfs_mount
vars:
nfs_source: '{{ anf_home_ip }}:/{{ anf_home_path }}'
nfs_path: '{{ homedir_mountpoint }}'
nfs_mount_opts: '{{ anf_home_opts }}'

- name: Disable SELinux
selinux:
state: disabled
register: selinux
when: ansible_distribution == 'CentOS' or ansible_distribution == 'AlmaLinux'

- name: reboot
reboot:
when: selinux.reboot_required | default(false)

- name: Domain Join
block:
Expand All @@ -108,10 +105,6 @@
domain_admin: "{{ad_join_user}}"
domain_password: "{{password.stdout}}"
domain: "{{domain_name}}"
domain_homedir: "{{homedir_mountpoint}}"
domain_mount_ip: "{{anf_home_ip}}"
domain_mount_path: "{{anf_home_path}}"
domain_mount_opts: "{{anf_home_opts}}"
domain_join_ou: "{{domain.domain_join_ou | default('')}}"

when: (authentication.user_auth | default('ad')) == "ad"
Expand Down Expand Up @@ -142,6 +135,10 @@
shell: |
cat <<EOF >/etc/profile.d/gen_sshkey.sh
#!/bin/sh
# Only do this for uid >= 1000
if [ \$(id -u) -le 1000 ]; then
return
fi
if [ ! -f ~/.ssh/id_rsa.pub ] ; then
ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa <<<y >/dev/null 2>&1
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
Expand All @@ -150,4 +147,12 @@
EOF
chmod 644 /etc/profile.d/gen_sshkey.sh
args:
creates: /etc/profile.d/gen_sshkey.sh
creates: /etc/profile.d/gen_sshkey.sh

- name: add pam_mkhomedir to pam to create users home directory if not exist (Ubuntu only)
lineinfile:
path: /etc/pam.d/common-session
line: 'session required pam_mkhomedir.so skel=/etc/skel'
state: present
create: yes
when: ansible_distribution == 'Ubuntu'
Loading

0 comments on commit 0acf6e7

Please sign in to comment.