From d100cc0b60523ca9455b1aff02238ce75fb7c57a Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Sun, 31 Mar 2024 19:37:43 +0200 Subject: [PATCH 01/10] Slimmed down prepare --- bin/prepare.sh | 83 +++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 49 deletions(-) diff --git a/bin/prepare.sh b/bin/prepare.sh index eb2975e9..d64f8e4c 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -10,9 +10,9 @@ function ctrl_c() { DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" ## Patch system -sudo apt-get update && sudo apt-mark hold grub-pc && sudo DEBIAN_FRONTEND=noninteractive apt-get -y -o \ +sudo apt update && sudo apt-mark hold grub-pc && sudo DEBIAN_FRONTEND=noninteractive apt -y -o \ DPkg::options::="--force-confdef" -o DPkg::options::="--force-confold" -qq --force-yes upgrade && - sudo apt-get install --no-install-recommends -y jq + sudo apt install --no-install-recommends -y jq awscli python3-boto3 source $DIR/detect.sh echo "Detected cloud type ${CLOUD_NAME}" @@ -25,63 +25,42 @@ else ARCH="gpu" fi -## Do I have an additional disk for Docker images - looking for /dev/sdc (Azure) +## Check distribution +distribution=$( + . /etc/os-release + echo $ID$VERSION_ID | sed 's/\.//' +) -if [[ "${CLOUD_NAME}" == "azure" ]]; then - ADDL_DISK=$(lsblk | awk '/^sdc/ {print $1}') - ADDL_PART=$(lsblk -l | awk -v DISK="$ADDL_DISK" '($0 ~ DISK) && ($0 ~ /part/) {print $1}') - - if [ -n "$ADDL_DISK" ] && [ -z "$ADDL_PART" ]; then - echo "Found $ADDL_DISK, preparing it for use" - echo -e "g\nn\np\n1\n\n\nw\n" | sudo fdisk /dev/$ADDL_DISK - sleep 1s - ADDL_DEVICE=$(echo "/dev/"$ADDL_DISK"1") - sudo mkfs.ext4 $ADDL_DEVICE - sudo mkdir -p /var/lib/docker - echo "$ADDL_DEVICE /var/lib/docker ext4 rw,user,auto 0 0" | sudo tee -a /etc/fstab - mount /var/lib/docker - if [ $? -ne 0 ]; then - echo "Error during preparing of additional disk. Exiting." - exit 1 - fi - elif [ -n "$ADDL_DISK" ] && [ -n "$ADDL_PART" ]; then - echo "Found $ADDL_DISK - $ADDL_PART already mounted. Installing into present drive/directory structure." - - else - echo "Did not find $ADDL_DISK. Installing into present drive/directory structure." - fi +## Check if WSL2 +if grep -q Microsoft /proc/version && grep -q "WSL2" /proc/version; then + IS_WSL2="yes" fi ## Adding Nvidia Drivers -if [[ "${ARCH}" == "gpu" ]]; then - distribution=$( - . /etc/os-release - echo $ID$VERSION_ID | sed 's/\.//' - ) - curl -sS https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/3bf863cc.pub | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/nvidia-cuda.gpg - curl -sS https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/7fa2af80.pub | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/nvidia-machine-learning.gpg - echo "deb http://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64 /" | sudo tee /etc/apt/sources.list.d/cuda.list - echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64 /" | sudo tee /etc/apt/sources.list.d/cuda_learn.list - sudo apt update && sudo apt install -y nvidia-driver-470-server cuda-minimal-build-11-4 --no-install-recommends -o Dpkg::Options::="--force-overwrite" +if [[ "${ARCH}" == "gpu" && -z "${IS_WSL2}" ]]; then + case $distribution in + ubuntu2004) + sudo apt install -y nvidia-driver-470-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" + ;; + ubuntu2204) + sudo apt install -y nvidia-driver-535-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" + ;; + *) + echo "Unsupported distribution: $distribution" + exit 1 + ;; + esac fi -## Adding AWSCli -sudo apt-get install -y --no-install-recommends awscli python3-boto3 - ## Installing Docker -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/docker.gpg -sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -sudo apt-get update && sudo apt-get install -y --no-install-recommends docker-ce docker-ce-cli docker-buildx-plugin docker-compose-plugin containerd.io +sudo apt install -y --no-install-recommends docker.io docker-buildx docker-compose-v2 +## Install Nvidia Docker Container if [[ "${ARCH}" == "gpu" ]]; then - distribution=$( - . /etc/os-release - echo $ID$VERSION_ID - ) curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/nvidia-docker.gpg curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - sudo apt-get update && sudo apt-get install -y --no-install-recommends nvidia-docker2 nvidia-container-toolkit nvidia-container-runtime + sudo apt update && sudo apt install -y --no-install-recommends nvidia-docker2 nvidia-container-runtime if [ -f "/etc/docker/daemon.json" ]; then echo "Altering /etc/docker/daemon.json with default-rutime nvidia." cat /etc/docker/daemon.json | jq 'del(."default-runtime") + {"default-runtime": "nvidia"}' | sudo tee /etc/docker/daemon.json @@ -90,8 +69,14 @@ if [[ "${ARCH}" == "gpu" ]]; then sudo cp $DIR/../defaults/docker-daemon.json /etc/docker/daemon.json fi fi -sudo systemctl enable docker -sudo systemctl restart docker + +## Enable and start docker +if [[ -n "${IS_WSL2}" ]]; then + sudo service docker restart +else + sudo systemctl enable docker + sudo systemctl restart docker +fi ## Ensure user can run docker sudo usermod -a -G docker $(id -un) From f3eb856812e721f45e14d4b47d37245920019dd9 Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Sun, 24 Mar 2024 13:58:12 +0100 Subject: [PATCH 02/10] Revert "New syntax for swarm" This reverts commit fa942ce4347c391f6b24ef891162c940e2d025d3. --- scripts/evaluation/start.sh | 2 +- scripts/training/start.sh | 2 +- scripts/viewer/start.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/evaluation/start.sh b/scripts/evaluation/start.sh index fc0d2175..8f1dc12d 100755 --- a/scripts/evaluation/start.sh +++ b/scripts/evaluation/start.sh @@ -93,7 +93,7 @@ fi # Check if we will use Docker Swarm or Docker Compose if [[ "${DR_DOCKER_STYLE,,}" == "swarm" ]]; then - DISPLAY=$ROBO_DISPLAY docker stack deploy -d $COMPOSE_FILES $STACK_NAME + DISPLAY=$ROBO_DISPLAY docker stack deploy $COMPOSE_FILES $STACK_NAME else DISPLAY=$ROBO_DISPLAY docker compose $COMPOSE_FILES -p $STACK_NAME up -d fi diff --git a/scripts/training/start.sh b/scripts/training/start.sh index 968dd23d..16984e6b 100755 --- a/scripts/training/start.sh +++ b/scripts/training/start.sh @@ -187,7 +187,7 @@ if [[ "${DR_DOCKER_STYLE,,}" == "swarm" ]]; then exit 1 fi - DISPLAY=$ROBO_DISPLAY docker stack deploy -d $COMPOSE_FILES $STACK_NAME + DISPLAY=$ROBO_DISPLAY docker stack deploy $COMPOSE_FILES $STACK_NAME else DISPLAY=$ROBO_DISPLAY docker compose $COMPOSE_FILES -p $STACK_NAME up -d --scale robomaker=$DR_WORKERS diff --git a/scripts/viewer/start.sh b/scripts/viewer/start.sh index 2ebde2a1..1f743171 100755 --- a/scripts/viewer/start.sh +++ b/scripts/viewer/start.sh @@ -110,7 +110,7 @@ COMPOSE_FILES=$DR_DIR/docker/docker-compose-webviewer.yml if [[ "${DR_DOCKER_STYLE,,}" == "swarm" ]]; then COMPOSE_FILES="$COMPOSE_FILES -c $DR_DIR/docker/docker-compose-webviewer-swarm.yml" - docker stack deploy -d -c $COMPOSE_FILES $STACK_NAME + docker stack deploy -c $COMPOSE_FILES $STACK_NAME else docker compose -f $COMPOSE_FILES -p $STACK_NAME up -d fi From c100fca1354573ba8c719bd2b6ad48ea99261f7e Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Sun, 31 Mar 2024 20:07:29 +0200 Subject: [PATCH 03/10] Update libnvidia-container --- bin/prepare.sh | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/bin/prepare.sh b/bin/prepare.sh index d64f8e4c..c6d1f80b 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + trap ctrl_c INT function ctrl_c() { @@ -39,16 +41,16 @@ fi ## Adding Nvidia Drivers if [[ "${ARCH}" == "gpu" && -z "${IS_WSL2}" ]]; then case $distribution in - ubuntu2004) - sudo apt install -y nvidia-driver-470-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" - ;; - ubuntu2204) - sudo apt install -y nvidia-driver-535-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" - ;; - *) - echo "Unsupported distribution: $distribution" - exit 1 - ;; + ubuntu2004) + sudo apt install -y nvidia-driver-525-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" + ;; + ubuntu2204) + sudo apt install -y nvidia-driver-535-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" + ;; + *) + echo "Unsupported distribution: $distribution" + exit 1 + ;; esac fi @@ -57,8 +59,10 @@ sudo apt install -y --no-install-recommends docker.io docker-buildx docker-compo ## Install Nvidia Docker Container if [[ "${ARCH}" == "gpu" ]]; then - curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/nvidia-docker.gpg - curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && + curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list sudo apt update && sudo apt install -y --no-install-recommends nvidia-docker2 nvidia-container-runtime if [ -f "/etc/docker/daemon.json" ]; then From 321aeb9e53e741346b99afc722ff82a8fb7eb71f Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Sun, 31 Mar 2024 20:19:17 +0200 Subject: [PATCH 04/10] Avoid prompts --- bin/prepare.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/prepare.sh b/bin/prepare.sh index c6d1f80b..06a8eb2e 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -9,10 +9,11 @@ function ctrl_c() { exit 1 } +export DEBIAN_FRONTEND=noninteractive DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" ## Patch system -sudo apt update && sudo apt-mark hold grub-pc && sudo DEBIAN_FRONTEND=noninteractive apt -y -o \ +sudo apt update && sudo apt-mark hold grub-pc && sudo apt -y -o \ DPkg::options::="--force-confdef" -o DPkg::options::="--force-confold" -qq --force-yes upgrade && sudo apt install --no-install-recommends -y jq awscli python3-boto3 source $DIR/detect.sh From 17e83ffefcd81f2d3a07df070c87f34c3b2ed249 Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Sun, 31 Mar 2024 20:35:24 +0200 Subject: [PATCH 05/10] Sequence --- bin/prepare.sh | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/bin/prepare.sh b/bin/prepare.sh index 06a8eb2e..ef97e5b9 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -12,6 +12,22 @@ function ctrl_c() { export DEBIAN_FRONTEND=noninteractive DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +## Check distribution +distribution=$( + . /etc/os-release + echo $ID$VERSION_ID | sed 's/\.//' +) + +## Check if WSL2 +if grep -q Microsoft /proc/version && grep -q "WSL2" /proc/version; then + IS_WSL2="yes" +fi + +## Remove needsreboot in Ubuntu 22.04 +if [[ "${distribution}" == "ubuntu2204" && -z "${IS_WSL2}" ]]; then + sudo apt remove -y needrestart +fi + ## Patch system sudo apt update && sudo apt-mark hold grub-pc && sudo apt -y -o \ DPkg::options::="--force-confdef" -o DPkg::options::="--force-confold" -qq --force-yes upgrade && @@ -28,17 +44,6 @@ else ARCH="gpu" fi -## Check distribution -distribution=$( - . /etc/os-release - echo $ID$VERSION_ID | sed 's/\.//' -) - -## Check if WSL2 -if grep -q Microsoft /proc/version && grep -q "WSL2" /proc/version; then - IS_WSL2="yes" -fi - ## Adding Nvidia Drivers if [[ "${ARCH}" == "gpu" && -z "${IS_WSL2}" ]]; then case $distribution in From d422bc3ff791daf1624a3925f9435a907d75ca7f Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Sun, 31 Mar 2024 20:54:42 +0200 Subject: [PATCH 06/10] Syntax harmonize --- bin/init.sh | 2 ++ bin/prepare.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/init.sh b/bin/init.sh index 506c29de..b4057139 100755 --- a/bin/init.sh +++ b/bin/init.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -e + trap ctrl_c INT function ctrl_c() { diff --git a/bin/prepare.sh b/bin/prepare.sh index ef97e5b9..d80da634 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e From d88e2dfb853155ec724062480f98f5d2ffaf7e3c Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Mon, 1 Apr 2024 12:46:22 +0200 Subject: [PATCH 07/10] WSL2 compatible detection --- bin/prepare.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/prepare.sh b/bin/prepare.sh index d80da634..eb5df160 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -36,7 +36,14 @@ source $DIR/detect.sh echo "Detected cloud type ${CLOUD_NAME}" ## Do I have a GPU -GPUS=$(lspci | awk '/NVIDIA/ && ( /VGA/ || /3D controller/ ) ' | wc -l) +GPUS=0 +if [[ -z "${IS_WSL2}" ]]; then + GPUS=$(lspci | awk '/NVIDIA/ && ( /VGA/ || /3D controller/ ) ' | wc -l) +else + if [[ -f /usr/lib/wsl/lib/nvidia-smi ]]; then + GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) + fi +fi if [ $? -ne 0 ] || [ $GPUS -eq 0 ]; then ARCH="cpu" echo "No NVIDIA GPU detected. Will not install drivers." From cc5ea8c28492d06da5e0fa5145c26487f53963fc Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Mon, 1 Apr 2024 12:49:48 +0200 Subject: [PATCH 08/10] Case insensitive Microsoft --- bin/prepare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/prepare.sh b/bin/prepare.sh index eb5df160..c750b0ba 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -19,7 +19,7 @@ distribution=$( ) ## Check if WSL2 -if grep -q Microsoft /proc/version && grep -q "WSL2" /proc/version; then +if grep -qi Microsoft /proc/version && grep -q "WSL2" /proc/version; then IS_WSL2="yes" fi From f7dbf1dd3c179023db2701aa854bb921d25f56c4 Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Mon, 1 Apr 2024 13:09:39 +0200 Subject: [PATCH 09/10] Remove set -e --- bin/init.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/init.sh b/bin/init.sh index b4057139..506c29de 100755 --- a/bin/init.sh +++ b/bin/init.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -set -e - trap ctrl_c INT function ctrl_c() { From 383b0eafb9de329e45ba13fb3b22555a7d54f246 Mon Sep 17 00:00:00 2001 From: Lars Ludvigsen Date: Mon, 1 Apr 2024 13:12:11 +0200 Subject: [PATCH 10/10] Tune final message --- bin/prepare.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/prepare.sh b/bin/prepare.sh index c750b0ba..e17c4062 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -101,12 +101,14 @@ sudo usermod -a -G docker $(id -un) ## Reboot to load driver -- continue install if in cloud-init CLOUD_INIT=$(pstree -s $BASHPID | awk /cloud-init/ | wc -l) -if [[ "$CLOUD_INIT" -ne 0 ]]; then +if [[ "${CLOUD_INIT}" -ne 0 ]]; then echo "Rebooting in 5 seconds. Will continue with install." cd $DIR ./runonce.sh "./init.sh -c ${CLOUD_NAME} -a ${ARCH}" sleep 5s sudo shutdown -r +1 +elif [[ -n "${IS_WSL2}" || "${ARCH}" == "cpu" ]]; then + echo "First stage done. Log out, then log back in and run init.sh -c ${CLOUD_NAME} -a ${ARCH}" else echo "First stage done. Please reboot and run init.sh -c ${CLOUD_NAME} -a ${ARCH}" fi