From 82dfb1a9fa677eadc5d46fd570ca261594819f53 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Tue, 11 Feb 2020 13:07:13 +0300 Subject: [PATCH 1/3] updated CUDA to version 10 --- components/cuda/docker-compose.cuda.yml | 2 +- components/cuda/install.sh | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/components/cuda/docker-compose.cuda.yml b/components/cuda/docker-compose.cuda.yml index 66445f12437c..6c1076bd83dc 100644 --- a/components/cuda/docker-compose.cuda.yml +++ b/components/cuda/docker-compose.cuda.yml @@ -15,4 +15,4 @@ services: environment: NVIDIA_VISIBLE_DEVICES: all NVIDIA_DRIVER_CAPABILITIES: compute,utility - NVIDIA_REQUIRE_CUDA: "cuda>=9.0" + NVIDIA_REQUIRE_CUDA: "cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411" diff --git a/components/cuda/install.sh b/components/cuda/install.sh index 485ae989bdee..d80597d08c51 100755 --- a/components/cuda/install.sh +++ b/components/cuda/install.sh @@ -14,24 +14,25 @@ echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign echo "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list -CUDA_VERSION=9.0.176 -NCCL_VERSION=2.1.15 -CUDNN_VERSION=7.6.2.24 -CUDA_PKG_VERSION="9-0=${CUDA_VERSION}-1" +CUDA_VERSION=10.0.130 +NCCL_VERSION=2.5.6 +CUDNN_VERSION=7.6.5.32 +CUDA_PKG_VERSION="10-0=$CUDA_VERSION-1" echo 'export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}' >> ${HOME}/.bashrc echo 'export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}' >> ${HOME}/.bashrc apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \ - libprotobuf-dev \ - libprotoc-dev \ - protobuf-compiler \ cuda-cudart-$CUDA_PKG_VERSION \ + cuda-compat-10-0 \ cuda-libraries-$CUDA_PKG_VERSION \ - libnccl2=$NCCL_VERSION-1+cuda9.0 \ - libcudnn7=$CUDNN_VERSION-1+cuda9.0 && \ - ln -s cuda-9.0 /usr/local/cuda && \ + cuda-nvtx-$CUDA_PKG_VERSION \ + libnccl2=$NCCL_VERSION-1+cuda10.0 \ + libcudnn7=$CUDNN_VERSION-1+cuda10.0 && \ + ln -s cuda-10.0 /usr/local/cuda && \ + apt-mark hold libnccl2 libcudnn7 && \ rm -rf /var/lib/apt/lists/* \ /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/cuda.list python3 -m pip uninstall -y tensorflow python3 -m pip install --no-cache-dir tensorflow-gpu==1.13.1 + From ed4202b9b529d674d09d972d8463629659d8f84a Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Wed, 12 Feb 2020 11:48:56 +0300 Subject: [PATCH 2/3] updated tensorflow --- components/cuda/install.sh | 2 +- cvat/requirements/base.txt | 2 +- utils/tfrecords/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/components/cuda/install.sh b/components/cuda/install.sh index d80597d08c51..58f99acff509 100755 --- a/components/cuda/install.sh +++ b/components/cuda/install.sh @@ -34,5 +34,5 @@ apt-get update && apt-get install -y --no-install-recommends --allow-unauthentic /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/cuda.list python3 -m pip uninstall -y tensorflow -python3 -m pip install --no-cache-dir tensorflow-gpu==1.13.1 +python3 -m pip install --no-cache-dir tensorflow-gpu==1.15.2 diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index 11d098ed9923..5da8c57e16a5 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -39,7 +39,7 @@ django-rest-auth[with_social]==0.9.5 cython==0.29.13 matplotlib==3.0.3 scikit-image==0.15.0 -tensorflow==1.13.1 +tensorflow==1.15.2 keras==2.2.5 opencv-python==4.1.0.25 h5py==2.9.0 diff --git a/utils/tfrecords/requirements.txt b/utils/tfrecords/requirements.txt index 616c04018e20..bb0070d81b13 100644 --- a/utils/tfrecords/requirements.txt +++ b/utils/tfrecords/requirements.txt @@ -1,3 +1,3 @@ argparse==1.1 -tensorflow==1.13.1 +tensorflow==1.15.2 pathlib==1.0.1 From 304ffa09f67e80b8828787ed5e2edf79453da605 Mon Sep 17 00:00:00 2001 From: Andrey Zhavoronkov Date: Wed, 12 Feb 2020 12:38:41 +0300 Subject: [PATCH 3/3] added comment about NVIDIA_REQUIRE_CUDA env varOF --- components/cuda/docker-compose.cuda.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/components/cuda/docker-compose.cuda.yml b/components/cuda/docker-compose.cuda.yml index 6c1076bd83dc..41d325f3f2bf 100644 --- a/components/cuda/docker-compose.cuda.yml +++ b/components/cuda/docker-compose.cuda.yml @@ -15,4 +15,9 @@ services: environment: NVIDIA_VISIBLE_DEVICES: all NVIDIA_DRIVER_CAPABILITIES: compute,utility + # That environment variable is used by the Nvidia Container Runtime. + # The Nvidia Container Runtime parses this as: + # :space:: logical OR + # ,: Logical AND + # https://gitlab.com/nvidia/container-images/cuda/issues/31#note_149432780 NVIDIA_REQUIRE_CUDA: "cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411"