Skip to content

Commit

Permalink
30 feature update slurm version to 22052 (#33)
Browse files Browse the repository at this point in the history
* Update slurm from 21.08.8 to 22.05.2

Fix up some issues found during testing.

Use the correct Rocky Linux AMIs.

Fix an selinux issue with Alma Linux.

* Add extra 5 GB to root volume of compute node AMIs so don't fill up

AMI creation failed because the root volume filled up.
The Alma Linux 8 arm64 AMI only had a 4 GB root volume and it filled
up when installing packages.
  • Loading branch information
cartalla authored Jun 22, 2022
1 parent 48fa695 commit 57da982
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 157 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@

.mkdocs_venv/
site/
.vscode/

# Jekyll
Gemfile.lock
.jekyll-cache
Expand Down
17 changes: 12 additions & 5 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,24 @@ deactivate &> /dev/null || true

if ! yum list installed make &> /dev/null; then
echo -e "\nInstalling make"
sudo yum -y install make
if ! sudo yum -y install make; then
echo -e "\nwarning: Couldn't install make"
fi
fi

if ! yum list installed wget &> /dev/null; then
echo -e "\nInstalling wget"
sudo yum -y install wget
if ! sudo yum -y install wget; then
echo -e "\nwarning: Couldn't install wget"
fi
fi

if ! python3 --version &> /dev/null; then
echo -e "\nInstalling python3"
sudo yum -y install python3
if ! sudo yum -y install python3; then
echo -e "\nerror: Couldn't find python3 in the path or install it. This is required."
exit 1
fi
fi

# Check python version
Expand Down Expand Up @@ -71,8 +78,8 @@ if ! cdk --version &> /dev/null; then
fi
version=$(cdk --version | awk '{print $1}')
if [[ $version != $CDK_VERSION ]]; then
echo "Updating the local version of aws-cdk from version $version to $CDK_VERSION"
sudo npm update -g aws-cdk@$CDK_VERSION
echo "Updating the global version of aws-cdk from version $version to $CDK_VERSION"
sudo npm install -g aws-cdk@$CDK_VERSION
fi

# Create python virtual environment
Expand Down
8 changes: 5 additions & 3 deletions source/cdk/cdk_slurm_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -2188,10 +2188,13 @@ def create_slurm_node_ami(self):
logger.error(f"AmiMap doesn't have ImageId for {self.region}/{distribution}/{distribution_major_version}/{architecture}")
exit(1)
ami_info = ec2_client.describe_images(ImageIds=[ami_id])['Images'][0]
root_device_name = ami_info['RootDeviceName']
block_devices = []
root_device = True
for block_device_info in ami_info['BlockDeviceMappings']:
ami_volume_size = int(block_device_info['Ebs']['VolumeSize'])
device_name = block_device_info['DeviceName']
root_device = root_device_name == device_name
# Add 5GB to the AMI's root device so that there is room for installing packages
ami_volume_size = int(block_device_info['Ebs']['VolumeSize']) + 5
if root_device:
try:
volume_size = str(self.config['slurm']['SlurmNodeAmis']['BaseAmis'][self.region][distribution][distribution_major_version][architecture]['RootDeviceSize'])
Expand All @@ -2204,7 +2207,6 @@ def create_slurm_node_ami(self):
exit(1)
except KeyError:
volume_size = ami_volume_size
root_device = False
else:
volume_size = block_device_info['Ebs']['VolumeSize']
block_devices.append(
Expand Down
4 changes: 3 additions & 1 deletion source/cdk/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from schema import Schema, And, Use, Optional, Regex, SchemaError
from sys import exit

DEFAULT_SLURM_VERSION = '22.05.2'

config = {}

# Determine all AWS regions available on the account. We do not display opt-out region
Expand Down Expand Up @@ -127,7 +129,7 @@
# SlurmVersion:
# Latest tested version
# Critical security fix released in 21.08.8. Must be later than that.
Optional('SlurmVersion', default='21.08.8'): str,
Optional('SlurmVersion', default=DEFAULT_SLURM_VERSION): str,
#
# ClusterName:
# Default to the StackName
Expand Down
39 changes: 28 additions & 11 deletions source/create-ami-map.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
'''
import argparse
import boto3
import json
import logging
from pprint import PrettyPrinter

Expand All @@ -32,11 +33,13 @@

distributions_dict = {
'AlmaLinux': {
# x86_64: https://aws.amazon.com/marketplace/pp/prodview-mku4y3g4sjrye
# arm_64: https://aws.amazon.com/marketplace/pp/prodview-zgsymdwitnxmm
'owner': '679593333241',
'major_versions': ['8'],
'name_filter': 'AlmaLinux OS {distribution_major_version}*',
'product_codes': [
'be714bpjscoj5uvqz0of5mscl', # x86_64: https://aws.amazon.com/marketplace/pp/prodview-mku4y3g4sjrye
'6vto7uou7jjh4um1mwoy8ov0s', # arm_64: https://aws.amazon.com/marketplace/pp/prodview-zgsymdwitnxmm
]
},
'Amazon': {
'owner': '137112412989',
Expand All @@ -46,19 +49,20 @@
'CentOS': {
'owner': '125523088429',
'major_versions': ['7', '8'],
'name_filter': 'CentOS {distribution_major_version}*'
'name_filter': 'CentOS {distribution_major_version}*',
},
'RedHat': {
'owner': '309956199498',
'major_versions': ['7', '8'],
'name_filter': 'RHEL-{distribution_major_version}*'
},
'Rocky': {
# x86_64: https://aws.amazon.com/marketplace/pp/prodview-2otariyxb3mqu
# arm_64: https://aws.amazon.com/marketplace/pp/prodview-uzg6o44ep3ugw
'owner': '679593333241',
'major_versions': ['8'],
'name_filter': 'Rocky Linux {distribution_major_version}*'
'product_codes': [
'cotnnspjrsi38lfn8qo4ibnnm', # x86_64: https://aws.amazon.com/marketplace/pp/prodview-2otariyxb3mqu
'7tvwi95pv43herd5jg0bs6cu5', # arm_64: https://aws.amazon.com/marketplace/pp/prodview-uzg6o44ep3ugw
]
},
}

Expand Down Expand Up @@ -90,17 +94,30 @@ def main(filename, region, distribution):
for distribution_major_version in distributions_dict[distribution]['major_versions']:
logger.debug(f"distribution_major_version: {distribution_major_version}")
ami_map[region][distribution][distribution_major_version] = {}
name_filter = distributions_dict[distribution]['name_filter'].format(distribution_major_version=distribution_major_version)
logger.debug(f"name_filter: {name_filter}")
kwargs = {
'Owners': [distributions_dict[distribution]['owner']],
'Filters': [
{'Name': 'name', 'Values': [name_filter]},
{'Name': 'state', 'Values': ['available']}
]
}
if 'name_filter' in distributions_dict[distribution]:
name_filter = distributions_dict[distribution]['name_filter'].format(distribution_major_version=distribution_major_version)
logger.debug(f"name_filter: {name_filter}")
filter = {
'Name': 'name',
'Values': [name_filter]
}
kwargs['Filters'].append(filter)
if 'product_codes' in distributions_dict[distribution]:
product_codes = distributions_dict[distribution]['product_codes']
logger.debug(f'product_codes: {product_codes}')
filter = {
'Name': 'product-code',
'Values': product_codes
}
kwargs['Filters'].append(filter)
images = ec2_client.describe_images(**kwargs).get('Images', None)
logger.debug(f"Found {len(images)} images")
logger.debug(f"Found {len(images)} images:\n{json.dumps(images, indent=4)}")
for image in images:
if 'BETA' in image['Name']:
continue
Expand All @@ -117,7 +134,7 @@ def main(filename, region, distribution):
if not ami_map[region]:
del ami_map[region]

logger.debug(pp.pformat(ami_map))
logger.debug(f"ami_map:\n{json.dumps(ami_map, indent=4)}")

fh = open(filename, 'w')
print("AmiMap:", file=fh)
Expand Down
Loading

0 comments on commit 57da982

Please sign in to comment.