From f209f7d666cfff36789014dd2f01aafc3f2eebe9 Mon Sep 17 00:00:00 2001 From: "wangguoteng.p" Date: Mon, 13 Feb 2023 13:30:58 +0800 Subject: [PATCH 1/7] feature(wgt): add torch-rpc dockerfile --- docker/Dockerfile.rpc | 147 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 docker/Dockerfile.rpc diff --git a/docker/Dockerfile.rpc b/docker/Dockerfile.rpc new file mode 100644 index 0000000000..5fa7880b3c --- /dev/null +++ b/docker/Dockerfile.rpc @@ -0,0 +1,147 @@ +# syntax = docker/dockerfile:experimental +# +# NOTE: To build this you will need a docker version > 18.06 with +# experimental enabled and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ +ARG BASE_IMAGE=ubuntu:18.04 +ARG PYTHON_VERSION=3.7.12 +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 + +FROM ${BASE_IMAGE} as dev-base +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + libjpeg-dev \ + libpng-dev && \ + rm -rf /var/lib/apt/lists/* +RUN /usr/sbin/update-ccache-symlinks +RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache +ENV PATH /opt/conda/bin:$PATH + +FROM dev-base as conda +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +ARG PYTHON_VERSION=3.7.12 +# Automatically set by buildx +ARG TARGETPLATFORM +# translating Docker's TARGETPLATFORM into miniconda arches +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MINICONDA_ARCH=aarch64 ;; \ + *) MINICONDA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh" +COPY requirements.txt . +RUN chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \ + /opt/conda/bin/python -mpip install -r requirements.txt && \ + /opt/conda/bin/conda clean -ya + +FROM dev-base as submodule-update +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +WORKDIR /opt/pytorch +COPY . . + +RUN git submodule update --init --recursive --jobs 0 && \ + cd third_party/tensorpipe && \ + git remote set-url origin https://github.com/SolenoidWGT/tensorpipe.git && \ + git pull origin k8s-env && \ + git checkout k8s-env + +FROM conda as build +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +WORKDIR /opt/pytorch +COPY --from=conda /opt/conda /opt/conda +COPY --from=submodule-update /opt/pytorch /opt/pytorch + +# we only build for 1080TI and A100 +RUN --mount=type=cache,target=/opt/ccache \ + TORCH_CUDA_ARCH_LIST="6.1 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ + CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \ + python setup.py install + + +FROM build as dev +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +ARG OS_NAME=ubuntu18.04 +ARG MOFED_VER=5.1-0.6.6.0 + +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:UTF-8 +ENV LC_ALL en_US.UTF-8 + +# Should override the already installed version from the official-image stage +COPY --from=build /opt/conda /opt/conda +# COPY --from=build /opt/pytorch /opt/pytorch + +# Install IB driver and ibvlib +RUN apt-get update && \ + apt-get -y install apt-utils && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ + build-essential tcsh tcl tk \ + cmake make git curl vim wget ca-certificates \ + iputils-ping net-tools ethtool \ + perl lsb-release python-libxml2 \ + iproute2 pciutils libnl-route-3-200 \ + kmod libnuma1 lsof openssh-server \ + swig libelf1 automake libglib2.0-0 \ + autoconf graphviz chrpath flex libnl-3-200 m4 \ + debhelper autotools-dev gfortran libltdl-dev gdb && \ + rm -rf /rm -rf /var/lib/apt/lists/* + +# we remove --without-neohost-backend --without-neohost-sdk and add --skip-unsupported-devices-check to avoid error +RUN /opt/conda/bin/conda create -n python2 python=2.7 && \ + alias python=/opt/conda/envs/python2/bin/python && \ + cd ~ && \ + wget https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VER}/MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64.tgz && \ + tar -xvf MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64.tgz && \ + MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64/mlnxofedinstall --user-space-only --without-fw-update --without-neohost-backend --without-neohost-sdk --skip-unsupported-devices-check -q && \ + cd .. && \ + rm -rf ${MOFED_DIR} && \ + rm -rf *.tgz && \ + /opt/conda/bin/conda remove -n python2 --all && \ + /opt/conda/bin/conda clean -ya + +RUN apt update \ + && apt install libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev swig curl git vim gcc \g++ make wget locales dnsutils -y \ + && apt clean \ + && rm -rf /var/cache/apt/* \ + && sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \ + && locale-gen + +RUN git clone https://github.com/SolenoidWGT/DI-engine.git && \ + cd DI-engine/ && \ + git pull origin rdma-dev && \ + git checkout rdma-dev && \ + sed -i "/torch/d" setup.py + +WORKDIR /opt/pytorch/DI-engine + +RUN /opt/conda/bin/python -m pip install --upgrade pip \ + && /opt/conda/bin/python -m pip install --ignore-installed 'PyYAML<6.0' \ + && /opt/conda/bin/python -m pip install --no-cache-dir .[fast,test] + +RUN git clone https://github.com/opendilab/DI-treetensor.git && \ + cd DI-treetensor && \ + sed -i '/torch/d' requirements.txt && \ + /opt/conda/bin/python setup.py install && \ + cd .. && rm -r ./DI-treetensor + +RUN /opt/conda/bin/python -m pip install --no-cache-dir .[common_env] \ + && /opt/conda/bin/pip install autorom \ + && AutoROM --accept-license + +RUN /opt/conda/bin/pip install -e . -i https://pkg.sensetime.com/repository/pypi-proxy/simple/ --trusted-host pkg.sensetime.com --user From 3f16758e89e104117e81a3e946c6b5930715b7c7 Mon Sep 17 00:00:00 2001 From: "wangguoteng.p" Date: Mon, 13 Feb 2023 13:30:58 +0800 Subject: [PATCH 2/7] feature(wgt): add torch-rpc fix dockerfile and add new rpc deploy CI-action --- .github/workflows/rpc_deploy.yml | 102 +++++++++++++++++++++ docker/Dockerfile.rpc | 23 +++++ docker/Dockerfile.torch | 147 +++++++++++++++++++++++++++++++ 3 files changed, 272 insertions(+) create mode 100644 .github/workflows/rpc_deploy.yml create mode 100644 docker/Dockerfile.rpc create mode 100644 docker/Dockerfile.torch diff --git a/.github/workflows/rpc_deploy.yml b/.github/workflows/rpc_deploy.yml new file mode 100644 index 0000000000..b63edbfc45 --- /dev/null +++ b/.github/workflows/rpc_deploy.yml @@ -0,0 +1,102 @@ +name: rpc_deploy # deploy docker + +on: + push: + branches: ['p2p-rpc-dockerfile'] + +jobs: + docker_base: + runs-on: ubuntu-latest + # if: "contains(github.event.head_commit.message, 'enable docker')" + strategy: + matrix: + platform: [linux/amd64] + # python-version: [3.6, 3.7, 3.8] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Get current date + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%d')" + + - name: Prepare + id: prep + env: + DOCKERIO_ORG: opendilab + TARGET: ding + DATE: ${{ steps.date.outputs.date }} + run: | + DOCKER_IMAGE=$DOCKERIO_ORG/$TARGET + VERSION=$DATE + if [[ $GITHUB_REF == refs/tags/* ]]; then + VERSION=${GITHUB_REF#refs/tags/} + fi + TAGS="${DOCKER_IMAGE}:${VERSION}" + NIGHTLY_TAGS="${DOCKER_IMAGE}:nightly" + if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then + TAGS="$TAGS,${DOCKER_IMAGE}:latest" + fi + echo ::set-output name=tags::${TAGS} + echo ::set-output name=nightlytags::${NIGHTLY_TAGS} + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + with: + platforms: all + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v1 + + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERIO_USERNAME }} + password: ${{ secrets.DOCKERIO_PASSWORD }} + + - name: Build and push + id: docker_build + uses: docker/build-push-action@v2 + with: + builder: ${{ steps.buildx.outputs.name }} + context: ./ + file: ./docker/Dockerfile.base + push: ${{ github.event_name != 'pull_request' }} + tags: | + ${{ steps.prep.outputs.tags }} + ${{ steps.prep.outputs.nightlytags }} + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache + target: base + + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} + + docker_rpc: + runs-on: ubuntu-latest + needs: docker_base + # if: "contains(github.event.head_commit.message, 'dmc2gym docker')" + strategy: + matrix: + platform: [linux/amd64] + # python-version: [3.6, 3.7, 3.8] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERIO_USERNAME }} + password: ${{ secrets.DOCKERIO_PASSWORD }} + + - name: Build and push + id: docker_build + run: | + docker buildx build -f ./docker/Dockerfile.rpc . -t opendilab/ding:nightly-rpc-base --target=rpc + docker push opendilab/ding:nightly-rpc-base diff --git a/docker/Dockerfile.rpc b/docker/Dockerfile.rpc new file mode 100644 index 0000000000..b9e9496548 --- /dev/null +++ b/docker/Dockerfile.rpc @@ -0,0 +1,23 @@ +FROM snsao/pytorch:tensorpipe-fix as base + +WORKDIR /ding + +RUN apt update \ + && apt install libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev swig curl git vim gcc \g++ make wget locales dnsutils -y \ + && apt clean \ + && rm -rf /var/cache/apt/* \ + && sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \ + && locale-gen + +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:UTF-8 +ENV LC_ALL en_US.UTF-8 + +ADD setup.py setup.py +ADD dizoo dizoo +ADD ding ding +ADD README.md README.md + +RUN python3 -m pip install --upgrade pip \ + && python3 -m pip install --ignore-installed 'PyYAML<6.0' \ + && python3 -m pip install --no-cache-dir .[fast,test] diff --git a/docker/Dockerfile.torch b/docker/Dockerfile.torch new file mode 100644 index 0000000000..5fa7880b3c --- /dev/null +++ b/docker/Dockerfile.torch @@ -0,0 +1,147 @@ +# syntax = docker/dockerfile:experimental +# +# NOTE: To build this you will need a docker version > 18.06 with +# experimental enabled and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ +ARG BASE_IMAGE=ubuntu:18.04 +ARG PYTHON_VERSION=3.7.12 +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 + +FROM ${BASE_IMAGE} as dev-base +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + libjpeg-dev \ + libpng-dev && \ + rm -rf /var/lib/apt/lists/* +RUN /usr/sbin/update-ccache-symlinks +RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache +ENV PATH /opt/conda/bin:$PATH + +FROM dev-base as conda +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +ARG PYTHON_VERSION=3.7.12 +# Automatically set by buildx +ARG TARGETPLATFORM +# translating Docker's TARGETPLATFORM into miniconda arches +RUN case ${TARGETPLATFORM} in \ + "linux/arm64") MINICONDA_ARCH=aarch64 ;; \ + *) MINICONDA_ARCH=x86_64 ;; \ + esac && \ + curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh" +COPY requirements.txt . +RUN chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \ + /opt/conda/bin/python -mpip install -r requirements.txt && \ + /opt/conda/bin/conda clean -ya + +FROM dev-base as submodule-update +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +WORKDIR /opt/pytorch +COPY . . + +RUN git submodule update --init --recursive --jobs 0 && \ + cd third_party/tensorpipe && \ + git remote set-url origin https://github.com/SolenoidWGT/tensorpipe.git && \ + git pull origin k8s-env && \ + git checkout k8s-env + +FROM conda as build +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +WORKDIR /opt/pytorch +COPY --from=conda /opt/conda /opt/conda +COPY --from=submodule-update /opt/pytorch /opt/pytorch + +# we only build for 1080TI and A100 +RUN --mount=type=cache,target=/opt/ccache \ + TORCH_CUDA_ARCH_LIST="6.1 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ + CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \ + python setup.py install + + +FROM build as dev +ARG https_proxy=http://proxy.sensetime.com:3128 +ARG http_proxy=http://proxy.sensetime.com:3128 +ARG OS_NAME=ubuntu18.04 +ARG MOFED_VER=5.1-0.6.6.0 + +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:UTF-8 +ENV LC_ALL en_US.UTF-8 + +# Should override the already installed version from the official-image stage +COPY --from=build /opt/conda /opt/conda +# COPY --from=build /opt/pytorch /opt/pytorch + +# Install IB driver and ibvlib +RUN apt-get update && \ + apt-get -y install apt-utils && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ + build-essential tcsh tcl tk \ + cmake make git curl vim wget ca-certificates \ + iputils-ping net-tools ethtool \ + perl lsb-release python-libxml2 \ + iproute2 pciutils libnl-route-3-200 \ + kmod libnuma1 lsof openssh-server \ + swig libelf1 automake libglib2.0-0 \ + autoconf graphviz chrpath flex libnl-3-200 m4 \ + debhelper autotools-dev gfortran libltdl-dev gdb && \ + rm -rf /rm -rf /var/lib/apt/lists/* + +# we remove --without-neohost-backend --without-neohost-sdk and add --skip-unsupported-devices-check to avoid error +RUN /opt/conda/bin/conda create -n python2 python=2.7 && \ + alias python=/opt/conda/envs/python2/bin/python && \ + cd ~ && \ + wget https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VER}/MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64.tgz && \ + tar -xvf MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64.tgz && \ + MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64/mlnxofedinstall --user-space-only --without-fw-update --without-neohost-backend --without-neohost-sdk --skip-unsupported-devices-check -q && \ + cd .. && \ + rm -rf ${MOFED_DIR} && \ + rm -rf *.tgz && \ + /opt/conda/bin/conda remove -n python2 --all && \ + /opt/conda/bin/conda clean -ya + +RUN apt update \ + && apt install libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev swig curl git vim gcc \g++ make wget locales dnsutils -y \ + && apt clean \ + && rm -rf /var/cache/apt/* \ + && sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \ + && locale-gen + +RUN git clone https://github.com/SolenoidWGT/DI-engine.git && \ + cd DI-engine/ && \ + git pull origin rdma-dev && \ + git checkout rdma-dev && \ + sed -i "/torch/d" setup.py + +WORKDIR /opt/pytorch/DI-engine + +RUN /opt/conda/bin/python -m pip install --upgrade pip \ + && /opt/conda/bin/python -m pip install --ignore-installed 'PyYAML<6.0' \ + && /opt/conda/bin/python -m pip install --no-cache-dir .[fast,test] + +RUN git clone https://github.com/opendilab/DI-treetensor.git && \ + cd DI-treetensor && \ + sed -i '/torch/d' requirements.txt && \ + /opt/conda/bin/python setup.py install && \ + cd .. && rm -r ./DI-treetensor + +RUN /opt/conda/bin/python -m pip install --no-cache-dir .[common_env] \ + && /opt/conda/bin/pip install autorom \ + && AutoROM --accept-license + +RUN /opt/conda/bin/pip install -e . -i https://pkg.sensetime.com/repository/pypi-proxy/simple/ --trusted-host pkg.sensetime.com --user From d1a568a3f197f23ba6b05416c267265d12c77bd8 Mon Sep 17 00:00:00 2001 From: SolenoidWGT <877825076@qq.com> Date: Thu, 9 Mar 2023 15:11:03 +0800 Subject: [PATCH 3/7] test_ci --- .github/workflows/rpc_deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rpc_deploy.yml b/.github/workflows/rpc_deploy.yml index b63edbfc45..16396ee229 100644 --- a/.github/workflows/rpc_deploy.yml +++ b/.github/workflows/rpc_deploy.yml @@ -100,3 +100,4 @@ jobs: run: | docker buildx build -f ./docker/Dockerfile.rpc . -t opendilab/ding:nightly-rpc-base --target=rpc docker push opendilab/ding:nightly-rpc-base + From bb3ffdb62db14726bc283e82da05e959a0313027 Mon Sep 17 00:00:00 2001 From: SolenoidWGT <877825076@qq.com> Date: Thu, 9 Mar 2023 16:28:38 +0800 Subject: [PATCH 4/7] Add the rpc image build logic to deploy.yml --- .github/workflows/deploy.yml | 23 +++++ .github/workflows/rpc_deploy.yml | 103 ---------------------- docker/Dockerfile.torch | 147 ------------------------------- 3 files changed, 23 insertions(+), 250 deletions(-) delete mode 100644 .github/workflows/rpc_deploy.yml delete mode 100644 docker/Dockerfile.torch diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 5246076d78..c7cb0b8f1d 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -386,3 +386,26 @@ jobs: run: | docker buildx build -f ./docker/Dockerfile.env . -t opendilab/ding:nightly-dmc2gym --target=dmc2gym docker push opendilab/ding:nightly-dmc2gym + + docker_rpc: + runs-on: ubuntu-latest + needs: docker_base + # if: "contains(github.event.head_commit.message, 'dmc2gym docker')" + strategy: + matrix: + platform: [linux/amd64] + # python-version: [3.6, 3.7, 3.8] + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERIO_USERNAME }} + password: ${{ secrets.DOCKERIO_PASSWORD }} + + - name: Build and push + id: docker_build + run: | + docker buildx build -f ./docker/Dockerfile.rpc . -t opendilab/ding:nightly-rpc-base --target=rpc + docker push opendilab/ding:nightly-rpc-base diff --git a/.github/workflows/rpc_deploy.yml b/.github/workflows/rpc_deploy.yml deleted file mode 100644 index 16396ee229..0000000000 --- a/.github/workflows/rpc_deploy.yml +++ /dev/null @@ -1,103 +0,0 @@ -name: rpc_deploy # deploy docker - -on: - push: - branches: ['p2p-rpc-dockerfile'] - -jobs: - docker_base: - runs-on: ubuntu-latest - # if: "contains(github.event.head_commit.message, 'enable docker')" - strategy: - matrix: - platform: [linux/amd64] - # python-version: [3.6, 3.7, 3.8] - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Get current date - id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%d')" - - - name: Prepare - id: prep - env: - DOCKERIO_ORG: opendilab - TARGET: ding - DATE: ${{ steps.date.outputs.date }} - run: | - DOCKER_IMAGE=$DOCKERIO_ORG/$TARGET - VERSION=$DATE - if [[ $GITHUB_REF == refs/tags/* ]]; then - VERSION=${GITHUB_REF#refs/tags/} - fi - TAGS="${DOCKER_IMAGE}:${VERSION}" - NIGHTLY_TAGS="${DOCKER_IMAGE}:nightly" - if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then - TAGS="$TAGS,${DOCKER_IMAGE}:latest" - fi - echo ::set-output name=tags::${TAGS} - echo ::set-output name=nightlytags::${NIGHTLY_TAGS} - - name: Set up QEMU - uses: docker/setup-qemu-action@v1 - with: - platforms: all - - - name: Set up Docker Buildx - id: buildx - uses: docker/setup-buildx-action@v1 - - - name: Cache Docker layers - uses: actions/cache@v2 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERIO_USERNAME }} - password: ${{ secrets.DOCKERIO_PASSWORD }} - - - name: Build and push - id: docker_build - uses: docker/build-push-action@v2 - with: - builder: ${{ steps.buildx.outputs.name }} - context: ./ - file: ./docker/Dockerfile.base - push: ${{ github.event_name != 'pull_request' }} - tags: | - ${{ steps.prep.outputs.tags }} - ${{ steps.prep.outputs.nightlytags }} - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache - target: base - - - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} - - docker_rpc: - runs-on: ubuntu-latest - needs: docker_base - # if: "contains(github.event.head_commit.message, 'dmc2gym docker')" - strategy: - matrix: - platform: [linux/amd64] - # python-version: [3.6, 3.7, 3.8] - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERIO_USERNAME }} - password: ${{ secrets.DOCKERIO_PASSWORD }} - - - name: Build and push - id: docker_build - run: | - docker buildx build -f ./docker/Dockerfile.rpc . -t opendilab/ding:nightly-rpc-base --target=rpc - docker push opendilab/ding:nightly-rpc-base - diff --git a/docker/Dockerfile.torch b/docker/Dockerfile.torch deleted file mode 100644 index 5fa7880b3c..0000000000 --- a/docker/Dockerfile.torch +++ /dev/null @@ -1,147 +0,0 @@ -# syntax = docker/dockerfile:experimental -# -# NOTE: To build this you will need a docker version > 18.06 with -# experimental enabled and DOCKER_BUILDKIT=1 -# -# If you do not use buildkit you are not going to have a good time -# -# For reference: -# https://docs.docker.com/develop/develop-images/build_enhancements/ -ARG BASE_IMAGE=ubuntu:18.04 -ARG PYTHON_VERSION=3.7.12 -ARG https_proxy=http://proxy.sensetime.com:3128 -ARG http_proxy=http://proxy.sensetime.com:3128 - -FROM ${BASE_IMAGE} as dev-base -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - ca-certificates \ - ccache \ - cmake \ - curl \ - git \ - libjpeg-dev \ - libpng-dev && \ - rm -rf /var/lib/apt/lists/* -RUN /usr/sbin/update-ccache-symlinks -RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache -ENV PATH /opt/conda/bin:$PATH - -FROM dev-base as conda -ARG https_proxy=http://proxy.sensetime.com:3128 -ARG http_proxy=http://proxy.sensetime.com:3128 -ARG PYTHON_VERSION=3.7.12 -# Automatically set by buildx -ARG TARGETPLATFORM -# translating Docker's TARGETPLATFORM into miniconda arches -RUN case ${TARGETPLATFORM} in \ - "linux/arm64") MINICONDA_ARCH=aarch64 ;; \ - *) MINICONDA_ARCH=x86_64 ;; \ - esac && \ - curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh" -COPY requirements.txt . -RUN chmod +x ~/miniconda.sh && \ - ~/miniconda.sh -b -p /opt/conda && \ - rm ~/miniconda.sh && \ - /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \ - /opt/conda/bin/python -mpip install -r requirements.txt && \ - /opt/conda/bin/conda clean -ya - -FROM dev-base as submodule-update -ARG https_proxy=http://proxy.sensetime.com:3128 -ARG http_proxy=http://proxy.sensetime.com:3128 -WORKDIR /opt/pytorch -COPY . . - -RUN git submodule update --init --recursive --jobs 0 && \ - cd third_party/tensorpipe && \ - git remote set-url origin https://github.com/SolenoidWGT/tensorpipe.git && \ - git pull origin k8s-env && \ - git checkout k8s-env - -FROM conda as build -ARG https_proxy=http://proxy.sensetime.com:3128 -ARG http_proxy=http://proxy.sensetime.com:3128 -WORKDIR /opt/pytorch -COPY --from=conda /opt/conda /opt/conda -COPY --from=submodule-update /opt/pytorch /opt/pytorch - -# we only build for 1080TI and A100 -RUN --mount=type=cache,target=/opt/ccache \ - TORCH_CUDA_ARCH_LIST="6.1 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ - CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \ - python setup.py install - - -FROM build as dev -ARG https_proxy=http://proxy.sensetime.com:3128 -ARG http_proxy=http://proxy.sensetime.com:3128 -ARG OS_NAME=ubuntu18.04 -ARG MOFED_VER=5.1-0.6.6.0 - -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:UTF-8 -ENV LC_ALL en_US.UTF-8 - -# Should override the already installed version from the official-image stage -COPY --from=build /opt/conda /opt/conda -# COPY --from=build /opt/pytorch /opt/pytorch - -# Install IB driver and ibvlib -RUN apt-get update && \ - apt-get -y install apt-utils && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ - build-essential tcsh tcl tk \ - cmake make git curl vim wget ca-certificates \ - iputils-ping net-tools ethtool \ - perl lsb-release python-libxml2 \ - iproute2 pciutils libnl-route-3-200 \ - kmod libnuma1 lsof openssh-server \ - swig libelf1 automake libglib2.0-0 \ - autoconf graphviz chrpath flex libnl-3-200 m4 \ - debhelper autotools-dev gfortran libltdl-dev gdb && \ - rm -rf /rm -rf /var/lib/apt/lists/* - -# we remove --without-neohost-backend --without-neohost-sdk and add --skip-unsupported-devices-check to avoid error -RUN /opt/conda/bin/conda create -n python2 python=2.7 && \ - alias python=/opt/conda/envs/python2/bin/python && \ - cd ~ && \ - wget https://content.mellanox.com/ofed/MLNX_OFED-${MOFED_VER}/MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64.tgz && \ - tar -xvf MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64.tgz && \ - MLNX_OFED_LINUX-${MOFED_VER}-${OS_NAME}-x86_64/mlnxofedinstall --user-space-only --without-fw-update --without-neohost-backend --without-neohost-sdk --skip-unsupported-devices-check -q && \ - cd .. && \ - rm -rf ${MOFED_DIR} && \ - rm -rf *.tgz && \ - /opt/conda/bin/conda remove -n python2 --all && \ - /opt/conda/bin/conda clean -ya - -RUN apt update \ - && apt install libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev swig curl git vim gcc \g++ make wget locales dnsutils -y \ - && apt clean \ - && rm -rf /var/cache/apt/* \ - && sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \ - && locale-gen - -RUN git clone https://github.com/SolenoidWGT/DI-engine.git && \ - cd DI-engine/ && \ - git pull origin rdma-dev && \ - git checkout rdma-dev && \ - sed -i "/torch/d" setup.py - -WORKDIR /opt/pytorch/DI-engine - -RUN /opt/conda/bin/python -m pip install --upgrade pip \ - && /opt/conda/bin/python -m pip install --ignore-installed 'PyYAML<6.0' \ - && /opt/conda/bin/python -m pip install --no-cache-dir .[fast,test] - -RUN git clone https://github.com/opendilab/DI-treetensor.git && \ - cd DI-treetensor && \ - sed -i '/torch/d' requirements.txt && \ - /opt/conda/bin/python setup.py install && \ - cd .. && rm -r ./DI-treetensor - -RUN /opt/conda/bin/python -m pip install --no-cache-dir .[common_env] \ - && /opt/conda/bin/pip install autorom \ - && AutoROM --accept-license - -RUN /opt/conda/bin/pip install -e . -i https://pkg.sensetime.com/repository/pypi-proxy/simple/ --trusted-host pkg.sensetime.com --user From 99416f2cbc841ff45a4e41936c52348dc7a60d99 Mon Sep 17 00:00:00 2001 From: SolenoidWGT <877825076@qq.com> Date: Wed, 15 Mar 2023 14:19:51 +0800 Subject: [PATCH 5/7] add pull_request action for docker branch --- .github/workflows/deploy.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c7cb0b8f1d..2ffcc07137 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -3,7 +3,8 @@ name: deploy # deploy docker on: push: branches: [main, '*deploy*', '*docker*'] - + pull_request: + branches: ['*docker*'] jobs: docker_base: runs-on: ubuntu-latest From bfa506ed053ba226ae097ae97c60a8c7d469d950 Mon Sep 17 00:00:00 2001 From: SolenoidWGT <877825076@qq.com> Date: Wed, 15 Mar 2023 14:24:06 +0800 Subject: [PATCH 6/7] add ci skip --- .github/workflows/deploy.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index f2f28a3006..7fe831932b 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,7 +4,7 @@ on: push: branches: [main, '*deploy*', '*docker*'] pull_request: - branches: ['*docker*'] + branches: [main, '*docker*'] jobs: docker_base: runs-on: ubuntu-latest @@ -391,7 +391,7 @@ jobs: docker_rpc: runs-on: ubuntu-latest needs: docker_base - # if: "contains(github.event.head_commit.message, 'dmc2gym docker')" + if: "!contains(github.event.head_commit.message, 'ci skip')" strategy: matrix: platform: [linux/amd64] @@ -433,7 +433,7 @@ jobs: run: | docker buildx build -f ./docker/Dockerfile.env . -t opendilab/ding:nightly-evogym --target=evogym docker push opendilab/ding:nightly-evogym - + docker_d4rl: runs-on: ubuntu-latest needs: docker_mujoco From 46954d824b34699109de8b28294cd9d200d3aa38 Mon Sep 17 00:00:00 2001 From: "wangguoteng.p" Date: Thu, 30 Mar 2023 23:40:16 +0800 Subject: [PATCH 7/7] fix: test env in ci action yml[test rpc] --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 7fe831932b..17789d7730 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -391,7 +391,7 @@ jobs: docker_rpc: runs-on: ubuntu-latest needs: docker_base - if: "!contains(github.event.head_commit.message, 'ci skip')" + if: "contains(github.event.head_commit.message, 'test rpc')" strategy: matrix: platform: [linux/amd64]