diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 550defff7814d..5cc990902953b 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -73,7 +73,7 @@ jobs: - name: Container setup run: | # Pull base PyTorch container - docker pull nvcr.io/nvidia/pytorch:24.01-py3 + docker pull nvcr.io/nvidia/pytorch:24.02-py3 docker run --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume ${{ github.workspace }}/${{ github.run_id }}:/workspace --volume /mnt/datadrive/TestData:/home/TestData nvcr.io/nvidia/pytorch:24.01-py3 /bin/bash -c ' set -x @@ -93,21 +93,19 @@ jobs: # NeMo Installation ./reinstall.sh release - # Transformer Engine 1.2.0 # Transformer Engine installation git clone https://github.com/NVIDIA/TransformerEngine.git && \ pushd TransformerEngine && \ - git fetch origin 9b2fed514ea419141146f843ab2c84b22b86bfd7 && \ + git fetch origin bfe21c3d68b0a9951e5716fb520045db53419c5e && \ git checkout FETCH_HEAD && \ git submodule init && git submodule update && \ NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install . && \ popd - # Apex bugfix for PyTorch 23.11 container: https://github.com/NVIDIA/apex/pull/1760 # Apex installation git clone https://github.com/NVIDIA/apex.git && \ pushd apex && \ - git checkout b496d85fb88a801d8e680872a12822de310951fd && \ + git checkout 810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c && \ cp -R apex /usr/local/lib/python3.10/dist-packages && \ popd @@ -116,12 +114,13 @@ jobs: # Megatron Core installation git clone https://github.com/NVIDIA/Megatron-LM.git && \ pushd Megatron-LM && \ - git checkout 43792028f003ed25a3ee8c5a0d4cad82317d81b5 && \ + git checkout 7fe863f3d94f7b64a927b04b85f5c9339d3fb784 && \ pip install . && \ pushd megatron/core/datasets && \ make && \ popd && \ popd + export PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM" # Install only for test: L2: Segmentation Tool pushd tools/ctc_segmentation && \