From 83774ad37860cc1554eb332d56206fb8d90a837b Mon Sep 17 00:00:00 2001 From: tylertitsworth Date: Wed, 3 Jul 2024 15:53:25 -0700 Subject: [PATCH] fix lint issues Signed-off-by: tylertitsworth --- pytorch/Dockerfile | 8 ++++---- pytorch/README.md | 6 ++++-- pytorch/{ => multinode}/dockerd-entrypoint.sh | 3 ++- pytorch/{ => multinode}/generate_ssh_keys.sh | 0 pytorch/{ => multinode}/ssh_config | 0 pytorch/{ => multinode}/sshd_config | 0 6 files changed, 10 insertions(+), 7 deletions(-) rename pytorch/{ => multinode}/dockerd-entrypoint.sh (92%) rename pytorch/{ => multinode}/generate_ssh_keys.sh (100%) rename pytorch/{ => multinode}/ssh_config (100%) rename pytorch/{ => multinode}/sshd_config (100%) diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile index becf09d6..46bee019 100644 --- a/pytorch/Dockerfile +++ b/pytorch/Dockerfile @@ -108,7 +108,7 @@ RUN mkdir -p /var/run/sshd && \ ARG PYTHON_VERSION -COPY generate_ssh_keys.sh . +COPY multinode/generate_ssh_keys.sh . # modify generate_ssh_keys to be a helper script # print how to use helper script on bash startup @@ -117,9 +117,9 @@ RUN echo "source /usr/local/lib/python${PYTHON_VERSION}/dist-packages/oneccl_bin cat '/generate_ssh_keys.sh' >> ~/.startup && \ rm -rf /generate_ssh_keys.sh -COPY dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh -COPY sshd_config /etc/ssh/sshd_config -COPY ssh_config /etc/ssh/ssh_config +COPY multinode/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +COPY multinode/sshd_config /etc/ssh/sshd_config +COPY multinode/ssh_config /etc/ssh/ssh_config RUN mkdir -p /licensing diff --git a/pytorch/README.md b/pytorch/README.md index 48191e21..b1778cf5 100644 --- a/pytorch/README.md +++ b/pytorch/README.md @@ -147,6 +147,7 @@ To add these files correctly please follow the steps described below. chmod 600 id_rsa config authorized_keys chown root:root id_rsa.pub id_rsa config authorized_keys ``` + 3. Setup hostfile. The hostfile is needed for running torch distributed using `ipexrun` utility. If you're not using `ipexrun` you can skip this step. ```txt @@ -154,6 +155,7 @@ To add these files correctly please follow the steps described below. ... ``` + 4. Now start the workers and execute DDP on the launcher. 1. Worker run command: @@ -168,7 +170,7 @@ To add these files correctly please follow the steps described below. bash -c '/usr/sbin/sshd -D' ``` - 3. Launcher run command: + 2. Launcher run command: ```bash docker run -it --rm \ @@ -181,7 +183,7 @@ To add these files correctly please follow the steps described below. bash -c 'ipexrun cpu --nnodes 2 --nprocs-per-node 1 --master-addr 127.0.0.1 --master-port 3022 /workspace/tests/ipex-resnet50.py --ipex --device cpu --backend ccl' ``` -4. Start SSH server with a custom port. +5. Start SSH server with a custom port. If the user wants to define their own port to start the SSH server, it can be done so using the commands described below. 1. Worker command: diff --git a/pytorch/dockerd-entrypoint.sh b/pytorch/multinode/dockerd-entrypoint.sh similarity index 92% rename from pytorch/dockerd-entrypoint.sh rename to pytorch/multinode/dockerd-entrypoint.sh index 6b7de790..ba13c0f9 100755 --- a/pytorch/dockerd-entrypoint.sh +++ b/pytorch/multinode/dockerd-entrypoint.sh @@ -15,6 +15,7 @@ set -e set -a -source ~/.startup +# shellcheck disable=SC1091 +source "$HOME/.startup" set +a "$@" diff --git a/pytorch/generate_ssh_keys.sh b/pytorch/multinode/generate_ssh_keys.sh similarity index 100% rename from pytorch/generate_ssh_keys.sh rename to pytorch/multinode/generate_ssh_keys.sh diff --git a/pytorch/ssh_config b/pytorch/multinode/ssh_config similarity index 100% rename from pytorch/ssh_config rename to pytorch/multinode/ssh_config diff --git a/pytorch/sshd_config b/pytorch/multinode/sshd_config similarity index 100% rename from pytorch/sshd_config rename to pytorch/multinode/sshd_config