Skip to content

Create CentOS Stream 9 AMI with NVIDIA Driver #6

Create CentOS Stream 9 AMI with NVIDIA Driver

Create CentOS Stream 9 AMI with NVIDIA Driver #6

name: Create CentOS Stream 9 AMI with NVIDIA Driver
on:
workflow_dispatch:
jobs:
create-ami:
runs-on: ubuntu-latest
outputs:
ami_id: ${{ steps.create_ami.outputs.ami_id }}
instance_id: ${{ steps.launch_instance.outputs.instance_id }}
steps:
- name: Set up AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Launch EC2 instance
id: launch_instance
run: |
INSTANCE_ID=$(aws ec2 run-instances \
--image-id 'ami-01529018e3919dace' \
--instance-type g4dn.xlarge \
--key-name ${{ secrets.AWS_KEY_NAME }} \
--security-group-ids ${{ secrets.AWS_SECURITY_GROUP_ID }} \
--associate-public-ip-address \
--query 'Instances[0].InstanceId' \
--instance-market-options 'MarketType=spot' \
--output text)
echo "::set-output name=instance_id::$INSTANCE_ID"
- name: Install NVIDIA Driver
run: |
aws ssm send-command \
--document-name "AWS-RunShellScript" \
--targets "Key=instanceids,Values=${{ steps.launch_instance.outputs.instance_id }}" \
--parameters '{"commands":["sudo dnf update -y",
"sudo dnf install -y dnf-command(config-manager)",
"sudo dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm",
"sudo dnf install -y https://dl.fedoraproject.org/pub/epel/epel-next-release-latest-9.noarch.rpm",
"KERNEL_VERSION=$(uname -r)",
"sudo dnf install -y kernel-devel-$KERNEL_VERSION kernel-headers-$KERNEL_VERSION dkms",
"sudo dnf config-manager --add-repo=https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo",
"sudo dnf install -y nvidia-driver nvidia-driver-cuda",
"sudo reboot"]}' \
--comment "Install NVIDIA driver on CentOS Stream 9"
- name: Wait for instance to be ready and check GPU
run: |
aws ec2 wait instance-status-ok --instance-ids ${{ steps.launch_instance.outputs.instance_id }}
# Set the start time
START_TIME=$(date +%s)
TIMEOUT=$((30 * 60)) # 30 minutes in seconds
# Loop until GPU is detected or timeout is reached
while true; do
# Check elapsed time
ELAPSED_TIME=$(( $(date +%s) - START_TIME ))
if [ $ELAPSED_TIME -ge $TIMEOUT ]; then
echo "Timeout: GPU not detected within 30 minutes."
exit 1
fi
# Run nvidia-smi command via SSM
COMMAND_ID=$(aws ssm send-command \
--document-name "AWS-RunShellScript" \
--targets "Key=instanceids,Values=${{ steps.launch_instance.outputs.instance_id }}" \
--parameters '{"commands":["nvidia-smi"]}' \
--query "Command.CommandId" --output text)
# Wait for the command to complete
aws ssm wait command-executed \
--instance-id ${{ steps.launch_instance.outputs.instance_id }} \
--command-id $COMMAND_ID
# Retrieve the output from SSM
SMI_OUTPUT=$(aws ssm get-command-invocation \
--command-id $COMMAND_ID \
--instance-id ${{ steps.launch_instance.outputs.instance_id }} \
--query "StandardOutputContent" \
--output text)
echo "$SMI_OUTPUT"
# Check if GPU is detected
if echo "$SMI_OUTPUT" | grep -q "NVIDIA-SMI"; then
echo "GPU detected successfully!"
break
else
echo "No GPU detected yet, retrying in 10 seconds..."
sleep 10
fi
done
- name: Create AMI
id: create_ami
run: |
AMI_ID=$(aws ec2 create-image \
--instance-id ${{ steps.launch_instance.outputs.instance_id }} \
--name "CentOS-Stream9-NVIDIA-$(date +'%Y-%m-%d')" \
--description "CentOS Stream 9 with NVIDIA Driver" \
--no-reboot \
--query 'ImageId' \
--output text)
echo "::set-output name=ami_id::$AMI_ID"
echo "Created AMI: $AMI_ID"
- name: Terminate EC2 instance
run: |
aws ec2 terminate-instances --instance-ids ${{ steps.launch_instance.outputs.instance_id }}
aws ec2 wait instance-terminated --instance-ids ${{ steps.launch_instance.outputs.instance_id }}
echo "Terminated EC2 instance: ${{ steps.launch_instance.outputs.instance_id }}"
- name: Output AMI ID
run: |
echo "New AMI created with ID: ${{ steps.create_ami.outputs.ami_id }}"