Create CentOS Stream 9 AMI with NVIDIA Driver #6
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Create CentOS Stream 9 AMI with NVIDIA Driver | |
on: | |
workflow_dispatch: | |
jobs: | |
create-ami: | |
runs-on: ubuntu-latest | |
outputs: | |
ami_id: ${{ steps.create_ami.outputs.ami_id }} | |
instance_id: ${{ steps.launch_instance.outputs.instance_id }} | |
steps: | |
- name: Set up AWS credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
- name: Launch EC2 instance | |
id: launch_instance | |
run: | | |
INSTANCE_ID=$(aws ec2 run-instances \ | |
--image-id 'ami-01529018e3919dace' \ | |
--instance-type g4dn.xlarge \ | |
--key-name ${{ secrets.AWS_KEY_NAME }} \ | |
--security-group-ids ${{ secrets.AWS_SECURITY_GROUP_ID }} \ | |
--associate-public-ip-address \ | |
--query 'Instances[0].InstanceId' \ | |
--instance-market-options 'MarketType=spot' \ | |
--output text) | |
echo "::set-output name=instance_id::$INSTANCE_ID" | |
- name: Install NVIDIA Driver | |
run: | | |
aws ssm send-command \ | |
--document-name "AWS-RunShellScript" \ | |
--targets "Key=instanceids,Values=${{ steps.launch_instance.outputs.instance_id }}" \ | |
--parameters '{"commands":["sudo dnf update -y", | |
"sudo dnf install -y dnf-command(config-manager)", | |
"sudo dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm", | |
"sudo dnf install -y https://dl.fedoraproject.org/pub/epel/epel-next-release-latest-9.noarch.rpm", | |
"KERNEL_VERSION=$(uname -r)", | |
"sudo dnf install -y kernel-devel-$KERNEL_VERSION kernel-headers-$KERNEL_VERSION dkms", | |
"sudo dnf config-manager --add-repo=https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo", | |
"sudo dnf install -y nvidia-driver nvidia-driver-cuda", | |
"sudo reboot"]}' \ | |
--comment "Install NVIDIA driver on CentOS Stream 9" | |
- name: Wait for instance to be ready and check GPU | |
run: | | |
aws ec2 wait instance-status-ok --instance-ids ${{ steps.launch_instance.outputs.instance_id }} | |
# Set the start time | |
START_TIME=$(date +%s) | |
TIMEOUT=$((30 * 60)) # 30 minutes in seconds | |
# Loop until GPU is detected or timeout is reached | |
while true; do | |
# Check elapsed time | |
ELAPSED_TIME=$(( $(date +%s) - START_TIME )) | |
if [ $ELAPSED_TIME -ge $TIMEOUT ]; then | |
echo "Timeout: GPU not detected within 30 minutes." | |
exit 1 | |
fi | |
# Run nvidia-smi command via SSM | |
COMMAND_ID=$(aws ssm send-command \ | |
--document-name "AWS-RunShellScript" \ | |
--targets "Key=instanceids,Values=${{ steps.launch_instance.outputs.instance_id }}" \ | |
--parameters '{"commands":["nvidia-smi"]}' \ | |
--query "Command.CommandId" --output text) | |
# Wait for the command to complete | |
aws ssm wait command-executed \ | |
--instance-id ${{ steps.launch_instance.outputs.instance_id }} \ | |
--command-id $COMMAND_ID | |
# Retrieve the output from SSM | |
SMI_OUTPUT=$(aws ssm get-command-invocation \ | |
--command-id $COMMAND_ID \ | |
--instance-id ${{ steps.launch_instance.outputs.instance_id }} \ | |
--query "StandardOutputContent" \ | |
--output text) | |
echo "$SMI_OUTPUT" | |
# Check if GPU is detected | |
if echo "$SMI_OUTPUT" | grep -q "NVIDIA-SMI"; then | |
echo "GPU detected successfully!" | |
break | |
else | |
echo "No GPU detected yet, retrying in 10 seconds..." | |
sleep 10 | |
fi | |
done | |
- name: Create AMI | |
id: create_ami | |
run: | | |
AMI_ID=$(aws ec2 create-image \ | |
--instance-id ${{ steps.launch_instance.outputs.instance_id }} \ | |
--name "CentOS-Stream9-NVIDIA-$(date +'%Y-%m-%d')" \ | |
--description "CentOS Stream 9 with NVIDIA Driver" \ | |
--no-reboot \ | |
--query 'ImageId' \ | |
--output text) | |
echo "::set-output name=ami_id::$AMI_ID" | |
echo "Created AMI: $AMI_ID" | |
- name: Terminate EC2 instance | |
run: | | |
aws ec2 terminate-instances --instance-ids ${{ steps.launch_instance.outputs.instance_id }} | |
aws ec2 wait instance-terminated --instance-ids ${{ steps.launch_instance.outputs.instance_id }} | |
echo "Terminated EC2 instance: ${{ steps.launch_instance.outputs.instance_id }}" | |
- name: Output AMI ID | |
run: | | |
echo "New AMI created with ID: ${{ steps.create_ami.outputs.ami_id }}" |