Skip to content

Commit

Permalink
updating AMD bootc image
Browse files Browse the repository at this point in the history
1) using OOT driver and firmware instead of in-tree
2) moving to DKMS and ROCM 6.1.2 version

Signed-off-by: Yevgeny Shnaidman <[email protected]>
  • Loading branch information
yevgeny-shnaidman committed Aug 26, 2024
1 parent 87be4b4 commit 062841f
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 5 deletions.
40 changes: 38 additions & 2 deletions training/amd-bootc/Containerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
# Define the images to be used
ARG INSTRUCTLAB_IMAGE="quay.io/ai-lab/instructlab-amd:latest"
ARG BASEIMAGE="quay.io/centos-bootc/centos-bootc:stream9"
ARG DRIVER_TOOLKIT_IMAGE="quay.io/ai-lab/nvidia-builder:latest"

# first stage image for installing the kernel modules and firmware sources
FROM ${BASEIMAGE} as sources
COPY amdgpu.repo-6.1.2 /etc/yum.repos.d/amdgpu-6.1.2.repo
RUN mv /etc/selinux /etc/selinux.tmp\
&& dnf install -y amdgpu-dkms \
&& dnf clean all \
&& mv /etc/selinux.tmp /etc/selinux \
&& ln -s ../cloud-init.target /usr/lib/systemd/system/default.target.wants


FROM ${DRIVER_TOOLKIT_IMAGE} as builder
USER root
#ARG KERNEL_VERSION
COPY --from=sources /usr/src/amdgpu-6.7.0-1781449.el9 /amdgpu-drivers-source
WORKDIR /amdgpu-drivers-source
RUN KERNEL_VERSION=$(cat /etc/driver-toolkit-release.sh | cut -d'=' -f2 | cut -d'"' -f2) \
&& ./amd/dkms/pre-build.sh ${KERNEL_VERSION} \
&& make TTM_NAME=amdttm SCHED_NAME=amd-sched -C /usr/src/kernels/${KERNEL_VERSION} M=/amdgpu-drivers-source \
&& ./amd/dkms/post-build.sh ${KERNEL_VERSION}
#RUN ./amd/dkms/pre-build.sh ${KERNEL_VERSION}
#RUN make TTM_NAME=amdttm SCHED_NAME=amd-sched -C /usr/src/kernels/${KERNEL_VERSION} M=/amdgpu-drivers-source
#RUN ./amd/dkms/post-build.sh ${KERNEL_VERSION}

# Define the base image for the second stage
FROM ${BASEIMAGE}
Expand All @@ -9,10 +33,22 @@ ARG VENDOR=''
LABEL vendor=${VENDOR}
LABEL org.opencontainers.image.vendor=${VENDOR}

COPY --from=builder /amdgpu-drivers-source/amd/amdgpu/amdgpu.ko /lib/modules/*/amd/amdgpu/amdgpu.ko
COPY --from=builder /amdgpu-drivers-source/amd/amdkcl/amdkcl.ko /lib/modules/*/amd/amdkcl/amdkcl.ko
COPY --from=builder /amdgpu-drivers-source/amd/amdxcp/amdxcp.ko /lib/modules/*/amd/amdxcp/amdxcp.ko
COPY --from=builder /amdgpu-drivers-source/scheduler/amd-sched.ko /lib/modules/*/scheduler/amd-sched.ko
COPY --from=builder /amdgpu-drivers-source/ttm/amdttm.ko /lib/modules/*/ttm/amdttm.ko
COPY --from=builder /amdgpu-drivers-source/amddrm_buddy.ko /lib/modules/*/amddrm_buddy.ko
COPY --from=builder /amdgpu-drivers-source/amddrm_ttm_helper.ko /lib/modules/*/amddrm_ttm_helper.ko
RUN rm /lib/modules/*/kernel/drivers/gpu/drm/amd/amdgpu/amdgpu.ko.xz
RUN export KERNEL_VERSION=$(rpm -q --qf "%{VERSION}-%{RELEASE}.%{ARCH}" kernel-core) \
&& depmod $KERNEL_VERSION
COPY --from=sources /lib/firmware/updates/amdgpu /lib/firmware/amdgpu

ADD rocm.repo /etc/yum.repos.d/rocm.repo

ARG EXTRA_RPM_PACKAGES=''
RUN mv /etc/selinux /etc/selinux.tmp && \
RUN mv /etc/selinux /etc/selinux.tmp && \
dnf install -y \
cloud-init \
pciutils \
Expand Down Expand Up @@ -46,7 +82,7 @@ ARG SSHPUBKEY
RUN if [ -n "${SSHPUBKEY}" ]; then \
set -eu; mkdir -p /usr/ssh && \
echo 'AuthorizedKeysFile /usr/ssh/%u.keys .ssh/authorized_keys .ssh/authorized_keys2' >> /etc/ssh/sshd_config.d/30-auth-system.conf && \
echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys; \
fi

RUN sed -i 's/__REPLACE_TRAIN_DEVICE__/cuda/' /usr/bin/ilab
Expand Down
7 changes: 7 additions & 0 deletions training/amd-bootc/amdgpu.repo-6.1.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[amdgpu]
name=amdgpu
baseurl=https://repo.radeon.com/amdgpu/6.1.2/rhel/9.4/main/x86_64/
enabled=1
priority=50
gpgcheck=1
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key
6 changes: 3 additions & 3 deletions training/amd-bootc/rocm.repo
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[ROCm-6.0.2]
name=ROCm6.0.2
baseurl=https://repo.radeon.com/rocm/rhel$releasever/6.0.2/main
[ROCm-6.1.2]
name=ROCm6.1.2
baseurl=https://repo.radeon.com/rocm/rhel9/6.1.2/main
enabled=1
priority=50
gpgcheck=1
Expand Down

0 comments on commit 062841f

Please sign in to comment.