diff --git a/bin/prepare.sh b/bin/prepare.sh index eb2975e9..e17c4062 100755 --- a/bin/prepare.sh +++ b/bin/prepare.sh @@ -1,4 +1,6 @@ -#!/bin/bash +#!/usr/bin/env bash + +set -e trap ctrl_c INT @@ -7,17 +9,41 @@ function ctrl_c() { exit 1 } +export DEBIAN_FRONTEND=noninteractive DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" +## Check distribution +distribution=$( + . /etc/os-release + echo $ID$VERSION_ID | sed 's/\.//' +) + +## Check if WSL2 +if grep -qi Microsoft /proc/version && grep -q "WSL2" /proc/version; then + IS_WSL2="yes" +fi + +## Remove needsreboot in Ubuntu 22.04 +if [[ "${distribution}" == "ubuntu2204" && -z "${IS_WSL2}" ]]; then + sudo apt remove -y needrestart +fi + ## Patch system -sudo apt-get update && sudo apt-mark hold grub-pc && sudo DEBIAN_FRONTEND=noninteractive apt-get -y -o \ +sudo apt update && sudo apt-mark hold grub-pc && sudo apt -y -o \ DPkg::options::="--force-confdef" -o DPkg::options::="--force-confold" -qq --force-yes upgrade && - sudo apt-get install --no-install-recommends -y jq + sudo apt install --no-install-recommends -y jq awscli python3-boto3 source $DIR/detect.sh echo "Detected cloud type ${CLOUD_NAME}" ## Do I have a GPU -GPUS=$(lspci | awk '/NVIDIA/ && ( /VGA/ || /3D controller/ ) ' | wc -l) +GPUS=0 +if [[ -z "${IS_WSL2}" ]]; then + GPUS=$(lspci | awk '/NVIDIA/ && ( /VGA/ || /3D controller/ ) ' | wc -l) +else + if [[ -f /usr/lib/wsl/lib/nvidia-smi ]]; then + GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l) + fi +fi if [ $? -ne 0 ] || [ $GPUS -eq 0 ]; then ARCH="cpu" echo "No NVIDIA GPU detected. Will not install drivers." @@ -25,63 +51,33 @@ else ARCH="gpu" fi -## Do I have an additional disk for Docker images - looking for /dev/sdc (Azure) - -if [[ "${CLOUD_NAME}" == "azure" ]]; then - ADDL_DISK=$(lsblk | awk '/^sdc/ {print $1}') - ADDL_PART=$(lsblk -l | awk -v DISK="$ADDL_DISK" '($0 ~ DISK) && ($0 ~ /part/) {print $1}') - - if [ -n "$ADDL_DISK" ] && [ -z "$ADDL_PART" ]; then - echo "Found $ADDL_DISK, preparing it for use" - echo -e "g\nn\np\n1\n\n\nw\n" | sudo fdisk /dev/$ADDL_DISK - sleep 1s - ADDL_DEVICE=$(echo "/dev/"$ADDL_DISK"1") - sudo mkfs.ext4 $ADDL_DEVICE - sudo mkdir -p /var/lib/docker - echo "$ADDL_DEVICE /var/lib/docker ext4 rw,user,auto 0 0" | sudo tee -a /etc/fstab - mount /var/lib/docker - if [ $? -ne 0 ]; then - echo "Error during preparing of additional disk. Exiting." - exit 1 - fi - elif [ -n "$ADDL_DISK" ] && [ -n "$ADDL_PART" ]; then - echo "Found $ADDL_DISK - $ADDL_PART already mounted. Installing into present drive/directory structure." - - else - echo "Did not find $ADDL_DISK. Installing into present drive/directory structure." - fi -fi - ## Adding Nvidia Drivers -if [[ "${ARCH}" == "gpu" ]]; then - distribution=$( - . /etc/os-release - echo $ID$VERSION_ID | sed 's/\.//' - ) - curl -sS https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/3bf863cc.pub | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/nvidia-cuda.gpg - curl -sS https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64/7fa2af80.pub | sudo gpg --dearmour -o /etc/apt/trusted.gpg.d/nvidia-machine-learning.gpg - echo "deb http://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64 /" | sudo tee /etc/apt/sources.list.d/cuda.list - echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64 /" | sudo tee /etc/apt/sources.list.d/cuda_learn.list - sudo apt update && sudo apt install -y nvidia-driver-470-server cuda-minimal-build-11-4 --no-install-recommends -o Dpkg::Options::="--force-overwrite" +if [[ "${ARCH}" == "gpu" && -z "${IS_WSL2}" ]]; then + case $distribution in + ubuntu2004) + sudo apt install -y nvidia-driver-525-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" + ;; + ubuntu2204) + sudo apt install -y nvidia-driver-535-server --no-install-recommends -o Dpkg::Options::="--force-overwrite" + ;; + *) + echo "Unsupported distribution: $distribution" + exit 1 + ;; + esac fi -## Adding AWSCli -sudo apt-get install -y --no-install-recommends awscli python3-boto3 - ## Installing Docker -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/docker.gpg -sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -sudo apt-get update && sudo apt-get install -y --no-install-recommends docker-ce docker-ce-cli docker-buildx-plugin docker-compose-plugin containerd.io +sudo apt install -y --no-install-recommends docker.io docker-buildx docker-compose-v2 +## Install Nvidia Docker Container if [[ "${ARCH}" == "gpu" ]]; then - distribution=$( - . /etc/os-release - echo $ID$VERSION_ID - ) - curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/nvidia-docker.gpg - curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - - sudo apt-get update && sudo apt-get install -y --no-install-recommends nvidia-docker2 nvidia-container-toolkit nvidia-container-runtime + curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && + curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + + sudo apt update && sudo apt install -y --no-install-recommends nvidia-docker2 nvidia-container-runtime if [ -f "/etc/docker/daemon.json" ]; then echo "Altering /etc/docker/daemon.json with default-rutime nvidia." cat /etc/docker/daemon.json | jq 'del(."default-runtime") + {"default-runtime": "nvidia"}' | sudo tee /etc/docker/daemon.json @@ -90,8 +86,14 @@ if [[ "${ARCH}" == "gpu" ]]; then sudo cp $DIR/../defaults/docker-daemon.json /etc/docker/daemon.json fi fi -sudo systemctl enable docker -sudo systemctl restart docker + +## Enable and start docker +if [[ -n "${IS_WSL2}" ]]; then + sudo service docker restart +else + sudo systemctl enable docker + sudo systemctl restart docker +fi ## Ensure user can run docker sudo usermod -a -G docker $(id -un) @@ -99,12 +101,14 @@ sudo usermod -a -G docker $(id -un) ## Reboot to load driver -- continue install if in cloud-init CLOUD_INIT=$(pstree -s $BASHPID | awk /cloud-init/ | wc -l) -if [[ "$CLOUD_INIT" -ne 0 ]]; then +if [[ "${CLOUD_INIT}" -ne 0 ]]; then echo "Rebooting in 5 seconds. Will continue with install." cd $DIR ./runonce.sh "./init.sh -c ${CLOUD_NAME} -a ${ARCH}" sleep 5s sudo shutdown -r +1 +elif [[ -n "${IS_WSL2}" || "${ARCH}" == "cpu" ]]; then + echo "First stage done. Log out, then log back in and run init.sh -c ${CLOUD_NAME} -a ${ARCH}" else echo "First stage done. Please reboot and run init.sh -c ${CLOUD_NAME} -a ${ARCH}" fi