From 0786e08e1a7fbaf716acbc2c8605051be3ef1c8d Mon Sep 17 00:00:00 2001 From: Anda Zhou <83614683+azhou-determined@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:32:29 -0700 Subject: [PATCH] chore: bumpenvs for jupyter upgrades [MD-242] (#9660) --- .circleci/real_config.yml | 14 ++-- .circleci/scripts/pull_image_daemonset.yaml | 2 +- .../prepare-container/custom-env.rst | 8 +-- .../prepare-container/tensorflow-support.rst | 2 +- .../deploy/helm-config-reference.rst | 4 +- .../deploy/master-config-reference.rst | 4 +- .../reference/experiment-config-reference.rst | 4 +- docs/reference/job-config-reference.rst | 4 +- .../deploy-cluster/slurm/singularity.rst | 4 +- docs/setup-cluster/gcp/install-gcp.rst | 4 +- docs/setup-cluster/slurm/singularity.rst | 4 +- .../slurm/slurm-requirements.rst | 2 +- e2e_tests/tests/config.py | 12 ++-- .../tests/fixtures/ports-proxy/config.yaml | 2 +- .../iris_tf_keras/adaptive.yaml | 4 +- .../computer_vision/iris_tf_keras/const.yaml | 4 +- .../iris_tf_keras/distributed.yaml | 4 +- .../iris_tf_keras_cancelable.yaml | 4 +- .../iris_tf_keras_high_priority.yaml | 4 +- .../torchvision/core_api/deepspeed.yaml | 2 +- .../deepspeed_trial/deepspeed.yaml | 2 +- .../hf_image_classification/deepspeed.yaml | 2 +- .../hf_language_modeling/deepspeed.yaml | 2 +- .../determined/deploy/aws/templates/efs.yaml | 20 +++--- .../determined/deploy/aws/templates/fsx.yaml | 20 +++--- .../deploy/aws/templates/govcloud.yaml | 4 +- .../determined/deploy/aws/templates/lore.yaml | 20 +++--- .../deploy/aws/templates/secure.yaml | 20 +++--- .../deploy/aws/templates/simple-rds.yaml | 20 +++--- .../deploy/aws/templates/simple.yaml | 20 +++--- harness/determined/deploy/gcp/constants.py | 2 +- .../0.17.6-keras/metadata.json | 4 +- .../0.17.6-pytorch/metadata.json | 4 +- harness/tests/fixtures/checkpoint.json | 4 +- helm/charts/determined/values.yaml | 4 +- .../internal/config/provconfig/aws_config.go | 20 +++--- .../internal/config/provconfig/gcp_config.go | 2 +- master/pkg/schemas/expconf/const.go | 4 +- model_hub/Makefile | 2 +- schemas/test_cases/v0/experiment.yaml | 4 +- tools/scripts/bumpenvs.yaml | 66 +++++++++---------- tools/scripts/environments-target.txt | 2 +- .../non-scalar-metrics-4078.json | 4 +- .../responses/experiment-details/set-a.json | 8 +-- .../old-trial-config-noop-adaptive.json | 4 +- 45 files changed, 178 insertions(+), 178 deletions(-) diff --git a/.circleci/real_config.yml b/.circleci/real_config.yml index d0e1033bf04..483bc23272d 100644 --- a/.circleci/real_config.yml +++ b/.circleci/real_config.yml @@ -282,7 +282,7 @@ commands: - when: condition: <> steps: - - run: docker pull determinedai/pytorch-ngc-dev:0e43056 + - run: docker pull determinedai/pytorch-ngc-dev:f20b027 login-docker: parameters: @@ -2365,7 +2365,7 @@ jobs: test-unit-harness-gpu-tf: docker: - - image: determinedai/tensorflow-ngc-dev:0e43056 + - image: determinedai/tensorflow-ngc-dev:f20b027 resource_class: determined-ai/container-runner-gpu steps: - run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts @@ -2392,7 +2392,7 @@ jobs: test-unit-harness-pytorch2-gpu: docker: - - image: determinedai/pytorch-ngc-dev:0e43056 + - image: determinedai/pytorch-ngc-dev:f20b027 resource_class: determined-ai/container-runner-gpu steps: - run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts @@ -2419,7 +2419,7 @@ jobs: test-unit-harness-pytorch2-cpu: docker: - - image: determinedai/pytorch-ngc-dev:0e43056 + - image: determinedai/pytorch-ngc-dev:f20b027 steps: - run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts - checkout @@ -2445,7 +2445,7 @@ jobs: test-unit-harness-gpu-parallel: docker: - - image: determinedai/pytorch-ngc-dev:0e43056 + - image: determinedai/pytorch-ngc-dev:f20b027 resource_class: determined-ai/container-runner-multi-gpu steps: - run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts @@ -2472,7 +2472,7 @@ jobs: test-unit-harness-gpu-deepspeed: docker: - - image: determinedai/pytorch-ngc-dev:0e43056 + - image: determinedai/pytorch-ngc-dev:f20b027 resource_class: determined-ai/container-runner-gpu steps: - run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts @@ -3573,7 +3573,7 @@ jobs: type: string default: "1" environment-image: - default: determinedai/pytorch-ngc-dev:0e43056 + default: determinedai/pytorch-ngc-dev:f20b027 type: string accel-node-taints: type: string diff --git a/.circleci/scripts/pull_image_daemonset.yaml b/.circleci/scripts/pull_image_daemonset.yaml index bb0c9a25eeb..b0610ca374c 100644 --- a/.circleci/scripts/pull_image_daemonset.yaml +++ b/.circleci/scripts/pull_image_daemonset.yaml @@ -13,7 +13,7 @@ spec: spec: containers: - name: pull-docker-daemonset - image: determinedai/pytorch-ngc-dev:0e43056 + image: determinedai/pytorch-ngc-dev:f20b027 command: ["/bin/bash"] args: ["echo", "test"] resources: diff --git a/docs/model-dev-guide/prepare-container/custom-env.rst b/docs/model-dev-guide/prepare-container/custom-env.rst index c255e5574ac..bf0e8d62a4b 100644 --- a/docs/model-dev-guide/prepare-container/custom-env.rst +++ b/docs/model-dev-guide/prepare-container/custom-env.rst @@ -114,9 +114,9 @@ Default Images - - Environment - File Name - - CPUs - - ``determinedai/pytorch-ngc-dev:0e43056`` + - ``determinedai/pytorch-ngc-dev:f20b027`` - - NVIDIA GPUs - - ``determinedai/pytorch-ngc-dev:0e43056`` + - ``determinedai/pytorch-ngc-dev:f20b027`` - - AMD GPUs - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` @@ -155,7 +155,7 @@ Example Dockerfile that installs custom ``conda``-, ``pip``-, and ``apt``-based .. code:: bash # Determined Image - FROM determinedai/tensorflow-ngc-dev:0e43056 + FROM determinedai/tensorflow-ngc-dev:f20b027 # Custom Configuration RUN apt-get update && \ @@ -216,7 +216,7 @@ environments using :ref:`custom images `: .. code:: bash # Determined Image - FROM determinedai/pytorch-ngc-dev:0e43056 + FROM determinedai/pytorch-ngc-dev:f20b027 # Create a virtual environment RUN conda create -n myenv python=3.8 diff --git a/docs/model-dev-guide/prepare-container/tensorflow-support.rst b/docs/model-dev-guide/prepare-container/tensorflow-support.rst index ad67093e7d7..b26a016ae6f 100644 --- a/docs/model-dev-guide/prepare-container/tensorflow-support.rst +++ b/docs/model-dev-guide/prepare-container/tensorflow-support.rst @@ -20,7 +20,7 @@ Determined supports both TensorFlow 1 and 2. The version of TensorFlow used for experiment is controlled by the configured container image. Determined provides prebuilt Docker images that include TensorFlow 2+, 1.15, and 2.8, respectively: -- ``determinedai/tensorflow-ngc-dev:0e43056`` +- ``determinedai/tensorflow-ngc-dev:f20b027`` - ``determinedai/environments:cuda-10.2-pytorch-1.7-tf-1.15-gpu-0.21.2`` - ``determinedai/environments:cuda-11.2-tf-2.8-gpu-0.29.1`` diff --git a/docs/reference/deploy/helm-config-reference.rst b/docs/reference/deploy/helm-config-reference.rst index f52f4d4f42f..536a2ea3f1c 100644 --- a/docs/reference/deploy/helm-config-reference.rst +++ b/docs/reference/deploy/helm-config-reference.rst @@ -197,13 +197,13 @@ - ``cpuImage``: Sets the default Docker image for all non-GPU tasks. If a Docker image is specified in the :ref:`experiment config ` this default is overriden. - Defaults to: ``determinedai/pytorch-ngc-dev:0e43056``. + Defaults to: ``determinedai/pytorch-ngc-dev:f20b027``. - ``startupHook``: An optional inline script that will be executed as part of task set up. - ``gpuImage``: Sets the default Docker image for all GPU tasks. If a Docker image is specified in the :ref:`experiment config ` this default is overriden. Defaults - to: ``determinedai/pytorch-ngc-dev:0e43056``. + to: ``determinedai/pytorch-ngc-dev:f20b027``. - ``logPolicies``: Sets log policies for trials. For details, visit :ref:`log_policies `. diff --git a/docs/reference/deploy/master-config-reference.rst b/docs/reference/deploy/master-config-reference.rst index 8e30d76d7d0..24b9047d38c 100644 --- a/docs/reference/deploy/master-config-reference.rst +++ b/docs/reference/deploy/master-config-reference.rst @@ -89,12 +89,12 @@ configure different container images for NVIDIA GPU tasks using the ``cuda`` key Determined 0.17.6), CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using the ``rocm`` key. Default values: -- ``determinedai/pytorch-ngc-dev:0e43056`` for NVIDIA GPUs and for CPUs. +- ``determinedai/pytorch-ngc-dev:f20b027`` for NVIDIA GPUs and for CPUs. - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm. For TensorFlow users, we provide an image that must be referenced in the experiment configuration: -- ``determinedai/tensorflow-ngc-dev:0e43056`` for NVIDIA GPUs and for CPUs. +- ``determinedai/tensorflow-ngc-dev:f20b027`` for NVIDIA GPUs and for CPUs. ``environment_variables`` ========================= diff --git a/docs/reference/experiment-config-reference.rst b/docs/reference/experiment-config-reference.rst index 18bc552ce89..ed02bfddf3a 100644 --- a/docs/reference/experiment-config-reference.rst +++ b/docs/reference/experiment-config-reference.rst @@ -1335,12 +1335,12 @@ Optional. The Docker image to use when executing the workload. This image must b container images for NVIDIA GPU tasks using ``cuda`` key (``gpu`` prior to 0.17.6), CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using ``rocm`` key. Default values: -- ``determinedai/pytorch-ngc-dev:0e43056`` for NVIDIA GPUs and for CPUs. +- ``determinedai/pytorch-ngc-dev:f20b027`` for NVIDIA GPUs and for CPUs. - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm. For TensorFlow users, we provide an image that must be referenced in the experiment configuration: -- ``determinedai/tensorflow-ngc-dev:0e43056`` for NVIDIA GPUs and for CPUs. +- ``determinedai/tensorflow-ngc-dev:f20b027`` for NVIDIA GPUs and for CPUs. When the cluster is configured with :ref:`resource_manager.type: slurm ` and ``container_run_type: singularity``, images are executed using diff --git a/docs/reference/job-config-reference.rst b/docs/reference/job-config-reference.rst index 1370615974d..e17071f93c6 100644 --- a/docs/reference/job-config-reference.rst +++ b/docs/reference/job-config-reference.rst @@ -45,13 +45,13 @@ The following configuration settings are supported: different container images for NVIDIA GPU tasks using ``cuda`` key (``gpu`` prior to 0.17.6), CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using ``rocm`` key. Default values: - - ``determinedai/pytorch-ngc-dev:0e43056`` for NVIDIA GPUs and for CPUs. + - ``determinedai/pytorch-ngc-dev:f20b027`` for NVIDIA GPUs and for CPUs. - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm. For TensorFlow users, we provide an image that must be referenced in the experiment configuration: - - ``determinedai/tensorflow-ngc-dev:0e43056`` for NVIDIA GPUs and for CPUs. + - ``determinedai/tensorflow-ngc-dev:f20b027`` for NVIDIA GPUs and for CPUs. - ``force_pull_image``: Forcibly pull the image from the Docker registry and bypass the Docker cache. Defaults to ``false``. diff --git a/docs/setup-cluster/deploy-cluster/slurm/singularity.rst b/docs/setup-cluster/deploy-cluster/slurm/singularity.rst index 1ebae5767cb..a033f7b1778 100644 --- a/docs/setup-cluster/deploy-cluster/slurm/singularity.rst +++ b/docs/setup-cluster/deploy-cluster/slurm/singularity.rst @@ -30,9 +30,9 @@ by default in this version of Determined are described below. - - Environment - File Name - - CPUs - - ``determinedai/pytorch-ngc-dev:0e43056`` + - ``determinedai/pytorch-ngc-dev:f20b027`` - - NVIDIA GPUs - - ``determinedai/pytorch-ngc-dev:0e43056`` + - ``determinedai/pytorch-ngc-dev:f20b027`` - - AMD GPUs - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512`` diff --git a/docs/setup-cluster/gcp/install-gcp.rst b/docs/setup-cluster/gcp/install-gcp.rst index 1a04fcd868f..e7505ec7001 100644 --- a/docs/setup-cluster/gcp/install-gcp.rst +++ b/docs/setup-cluster/gcp/install-gcp.rst @@ -406,5 +406,5 @@ This command line will spin up a cluster of up to 2 A100s in the ``us-central1-c --compute-agent-instance-type a2-highgpu-1g --gpu-num 1 \ --gpu-type nvidia-tesla-a100 \ --region us-central1 --zone us-central1-c \ - --gpu-env-image determinedai/pytorch-ngc-dev:0e43056 \ - --cpu-env-image determinedai/pytorch-ngc-dev:0e43056 + --gpu-env-image determinedai/pytorch-ngc-dev:f20b027 \ + --cpu-env-image determinedai/pytorch-ngc-dev:f20b027 diff --git a/docs/setup-cluster/slurm/singularity.rst b/docs/setup-cluster/slurm/singularity.rst index 62586b774d4..619529cb1e6 100644 --- a/docs/setup-cluster/slurm/singularity.rst +++ b/docs/setup-cluster/slurm/singularity.rst @@ -30,9 +30,9 @@ by default in this version of Determined are described below. - - Environment - File Name - - CPUs - - ``determinedai/pytorch-ngc-dev:0e43056`` + - ``determinedai/pytorch-ngc-dev:f20b027`` - - NVIDIA GPUs - - ``determinedai/pytorch-ngc-dev:0e43056`` + - ``determinedai/pytorch-ngc-dev:f20b027`` - - AMD GPUs - ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512`` diff --git a/docs/setup-cluster/slurm/slurm-requirements.rst b/docs/setup-cluster/slurm/slurm-requirements.rst index 6b2502a851c..5e820a12aad 100644 --- a/docs/setup-cluster/slurm/slurm-requirements.rst +++ b/docs/setup-cluster/slurm/slurm-requirements.rst @@ -510,7 +510,7 @@ platform. There may be additional per-user configuration that is required. .. code:: bash - image=determinedai/pytorch-ngc-dev:0e43056 + image=determinedai/pytorch-ngc-dev:f20b027 cd /shared/enroot/images enroot import docker://$image enroot create /shared/enroot/images/${image//[\/:]/\+}.sqsh diff --git a/e2e_tests/tests/config.py b/e2e_tests/tests/config.py index 93b306a9957..c20bedcd775 100644 --- a/e2e_tests/tests/config.py +++ b/e2e_tests/tests/config.py @@ -14,12 +14,12 @@ MAX_TRIAL_BUILD_SECS = 90 -DEFAULT_TF2_CPU_IMAGE = "determinedai/tensorflow-ngc-dev:0e43056" -DEFAULT_TF2_GPU_IMAGE = "determinedai/tensorflow-ngc-dev:0e43056" -DEFAULT_PT_CPU_IMAGE = "determinedai/pytorch-tensorflow-cpu-dev:0e43056" -DEFAULT_PT_GPU_IMAGE = "determinedai/pytorch-tensorflow-cuda-dev:0e43056" -DEFAULT_PT2_CPU_IMAGE = "determinedai/pytorch-ngc-dev:0e43056" -DEFAULT_PT2_GPU_IMAGE = "determinedai/pytorch-ngc-dev:0e43056" +DEFAULT_TF2_CPU_IMAGE = "determinedai/tensorflow-ngc-dev:f20b027" +DEFAULT_TF2_GPU_IMAGE = "determinedai/tensorflow-ngc-dev:f20b027" +DEFAULT_PT_CPU_IMAGE = "determinedai/pytorch-tensorflow-cpu-dev:f20b027" +DEFAULT_PT_GPU_IMAGE = "determinedai/pytorch-tensorflow-cuda-dev:f20b027" +DEFAULT_PT2_CPU_IMAGE = "determinedai/pytorch-ngc-dev:f20b027" +DEFAULT_PT2_GPU_IMAGE = "determinedai/pytorch-ngc-dev:f20b027" TF2_CPU_IMAGE = os.environ.get("TF2_CPU_IMAGE") or DEFAULT_TF2_CPU_IMAGE TF2_GPU_IMAGE = os.environ.get("TF2_GPU_IMAGE") or DEFAULT_TF2_GPU_IMAGE diff --git a/e2e_tests/tests/fixtures/ports-proxy/config.yaml b/e2e_tests/tests/fixtures/ports-proxy/config.yaml index e104ad0f737..b540c13c9f5 100644 --- a/e2e_tests/tests/fixtures/ports-proxy/config.yaml +++ b/e2e_tests/tests/fixtures/ports-proxy/config.yaml @@ -23,7 +23,7 @@ max_restarts: 0 # Hardcode the image because the new image has a bug. TODO fix this when the image bug is fixed. environment: - image: determinedai/pytorch-tensorflow-cpu-dev:0e43056 + image: determinedai/pytorch-tensorflow-cpu-dev:f20b027 proxy_ports: - proxy_port: 8000 proxy_tcp: false diff --git a/examples/computer_vision/iris_tf_keras/adaptive.yaml b/examples/computer_vision/iris_tf_keras/adaptive.yaml index 64ede8131b4..25429656ad6 100644 --- a/examples/computer_vision/iris_tf_keras/adaptive.yaml +++ b/examples/computer_vision/iris_tf_keras/adaptive.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc-dev:0e43056 - gpu: determinedai/tensorflow-ngc-dev:0e43056 + cpu: determinedai/tensorflow-ngc-dev:f20b027 + gpu: determinedai/tensorflow-ngc-dev:f20b027 hyperparameters: learning_rate: type: log diff --git a/examples/computer_vision/iris_tf_keras/const.yaml b/examples/computer_vision/iris_tf_keras/const.yaml index 3a4660fee70..11c424eaff7 100644 --- a/examples/computer_vision/iris_tf_keras/const.yaml +++ b/examples/computer_vision/iris_tf_keras/const.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc-dev:0e43056 - gpu: determinedai/tensorflow-ngc-dev:0e43056 + cpu: determinedai/tensorflow-ngc-dev:f20b027 + gpu: determinedai/tensorflow-ngc-dev:f20b027 hyperparameters: learning_rate: 1.0e-4 learning_rate_decay: 1.0e-6 diff --git a/examples/computer_vision/iris_tf_keras/distributed.yaml b/examples/computer_vision/iris_tf_keras/distributed.yaml index 39223b576d8..0f488f5ccdb 100644 --- a/examples/computer_vision/iris_tf_keras/distributed.yaml +++ b/examples/computer_vision/iris_tf_keras/distributed.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc-dev:0e43056 - gpu: determinedai/tensorflow-ngc-dev:0e43056 + cpu: determinedai/tensorflow-ngc-dev:f20b027 + gpu: determinedai/tensorflow-ngc-dev:f20b027 hyperparameters: learning_rate: 1.0e-4 learning_rate_decay: 1.0e-6 diff --git a/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml b/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml index 7ccdf2600ab..0330f11aa11 100644 --- a/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml +++ b/examples/computer_vision/iris_tf_keras/iris_tf_keras_cancelable.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc-dev:0e43056 - gpu: determinedai/tensorflow-ngc-dev:0e43056 + cpu: determinedai/tensorflow-ngc-dev:f20b027 + gpu: determinedai/tensorflow-ngc-dev:f20b027 resources: slots_per_trial: 8 resource_pool: defq_GPU_cancelable diff --git a/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml b/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml index 49cd69cf868..330314afb93 100644 --- a/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml +++ b/examples/computer_vision/iris_tf_keras/iris_tf_keras_high_priority.yaml @@ -4,8 +4,8 @@ data: test_url: http://download.tensorflow.org/data/iris_test.csv environment: image: - cpu: determinedai/tensorflow-ngc-dev:0e43056 - gpu: determinedai/tensorflow-ngc-dev:0e43056 + cpu: determinedai/tensorflow-ngc-dev:f20b027 + gpu: determinedai/tensorflow-ngc-dev:f20b027 resources: slots_per_trial: 8 resource_pool: defq_GPU_hipri diff --git a/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml b/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml index 9f2fb93c953..aca633627ce 100644 --- a/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml +++ b/examples/deepspeed_autotune/torchvision/core_api/deepspeed.yaml @@ -2,7 +2,7 @@ name: torchvision dsat core_api max_restarts: 0 environment: image: - gpu: determinedai/pytorch-ngc-dev:0e43056 + gpu: determinedai/pytorch-ngc-dev:f20b027 resources: slots_per_trial: 2 shm_size: 4294967296 # 4 GiB. diff --git a/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml b/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml index dfb7af55416..d12756bd9ff 100644 --- a/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml +++ b/examples/deepspeed_autotune/torchvision/deepspeed_trial/deepspeed.yaml @@ -2,7 +2,7 @@ name: torchvision dsat deepspeed_trial max_restarts: 0 environment: image: - gpu: determinedai/pytorch-ngc-dev:0e43056 + gpu: determinedai/pytorch-ngc-dev:f20b027 resources: slots_per_trial: 2 shm_size: 4294967296 # 4 GiB. diff --git a/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml b/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml index b9fca49c608..c9d8f553b46 100644 --- a/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml +++ b/examples/hf_trainer_api/hf_image_classification/deepspeed.yaml @@ -6,7 +6,7 @@ environment: # You may need to modify this to match your network configuration. - NCCL_SOCKET_IFNAME=ens,eth,ib image: - gpu: determinedai/pytorch-ngc-dev:0e43056 + gpu: determinedai/pytorch-ngc-dev:f20b027 resources: slots_per_trial: 2 searcher: diff --git a/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml b/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml index cee2a137fb7..3d20684593d 100644 --- a/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml +++ b/examples/hf_trainer_api/hf_language_modeling/deepspeed.yaml @@ -6,7 +6,7 @@ environment: # You may need to modify this to match your network configuration. - NCCL_SOCKET_IFNAME=ens,eth,ib image: - gpu: determinedai/pytorch-ngc-dev:0e43056 + gpu: determinedai/pytorch-ngc-dev:f20b027 resources: slots_per_trial: 2 searcher: diff --git a/harness/determined/deploy/aws/templates/efs.yaml b/harness/determined/deploy/aws/templates/efs.yaml index e45b6fc08f4..12d095dbdc3 100644 --- a/harness/determined/deploy/aws/templates/efs.yaml +++ b/harness/determined/deploy/aws/templates/efs.yaml @@ -3,35 +3,35 @@ Mappings: RegionMap: ap-northeast-1: Master: ami-00910ef9457f0df47 - Agent: ami-0d5ce4472d6286746 + Agent: ami-0afb46883c38208a7 # TODO(DET-4258) Uncomment these when we fully support all P3 regions. # ap-northeast-2: # Master: ami-035e3e44dc41db6a2 - # Agent: ami-0090f68a647f10126 + # Agent: ami-0684df210044c21d5 # ap-southeast-1: # Master: ami-0fd1ee6c8b656f020 - # Agent: ami-048be732b76a4679e + # Agent: ami-0b3d9c646837892ad # ap-southeast-2: # Master: ami-0b62ecd3babd1c548 - # Agent: ami-05c05ba492caa1c68 + # Agent: ami-0db21a1ffe9fb8fd7 eu-central-1: Master: ami-0abbe417ed83c0b29 - Agent: ami-0ebd39eab325463c0 + Agent: ami-08ce183d559b307c8 eu-west-1: Master: ami-0e3f7dd2dc743e48a - Agent: ami-05c7e44456501d01d + Agent: ami-0cfbb4cbdf8e76b4b # eu-west-2: # Master: ami-0d78429fb6af30994 - # Agent: ami-0d3b65b9d8e18b354 + # Agent: ami-0ee850bb0d3dd97f0 us-east-1: Master: ami-0172070f66a8ebe63 - Agent: ami-0b25b64346732d0b5 + Agent: ami-039387004119ccf40 us-east-2: Master: ami-0bafa3699418551cd - Agent: ami-07517c67a90714250 + Agent: ami-07966023b5496e611 us-west-2: Master: ami-0ceeab680f529cc36 - Agent: ami-083f8147aeeba1eb2 + Agent: ami-002e87c2da1a565c9 Parameters: VpcCIDR: diff --git a/harness/determined/deploy/aws/templates/fsx.yaml b/harness/determined/deploy/aws/templates/fsx.yaml index 7a83afcf0da..8b035afbf3c 100644 --- a/harness/determined/deploy/aws/templates/fsx.yaml +++ b/harness/determined/deploy/aws/templates/fsx.yaml @@ -3,35 +3,35 @@ Mappings: RegionMap: ap-northeast-1: Master: ami-00910ef9457f0df47 - Agent: ami-0d5ce4472d6286746 + Agent: ami-0afb46883c38208a7 # TODO(DET-4258) Uncomment these when we fully support all P3 regions. # ap-northeast-2: # Master: ami-035e3e44dc41db6a2 - # Agent: ami-0090f68a647f10126 + # Agent: ami-0684df210044c21d5 # ap-southeast-1: # Master: ami-0fd1ee6c8b656f020 - # Agent: ami-048be732b76a4679e + # Agent: ami-0b3d9c646837892ad # ap-southeast-2: # Master: ami-0b62ecd3babd1c548 - # Agent: ami-05c05ba492caa1c68 + # Agent: ami-0db21a1ffe9fb8fd7 eu-central-1: Master: ami-0abbe417ed83c0b29 - Agent: ami-0ebd39eab325463c0 + Agent: ami-08ce183d559b307c8 eu-west-1: Master: ami-0e3f7dd2dc743e48a - Agent: ami-05c7e44456501d01d + Agent: ami-0cfbb4cbdf8e76b4b # eu-west-2: # Master: ami-0d78429fb6af30994 - # Agent: ami-0d3b65b9d8e18b354 + # Agent: ami-0ee850bb0d3dd97f0 us-east-1: Master: ami-0172070f66a8ebe63 - Agent: ami-0b25b64346732d0b5 + Agent: ami-039387004119ccf40 us-east-2: Master: ami-0bafa3699418551cd - Agent: ami-07517c67a90714250 + Agent: ami-07966023b5496e611 us-west-2: Master: ami-0ceeab680f529cc36 - Agent: ami-083f8147aeeba1eb2 + Agent: ami-002e87c2da1a565c9 Parameters: VpcCIDR: diff --git a/harness/determined/deploy/aws/templates/govcloud.yaml b/harness/determined/deploy/aws/templates/govcloud.yaml index 07ac9649e3a..b3ebd941eb2 100644 --- a/harness/determined/deploy/aws/templates/govcloud.yaml +++ b/harness/determined/deploy/aws/templates/govcloud.yaml @@ -5,10 +5,10 @@ Mappings: RegionMap: us-gov-east-1: Master: ami-04ef693ebcf519dc3 - Agent: ami-0cc63f942a6c1e5fd + Agent: ami-079d6ccabd0db16a2 us-gov-west-1: Master: ami-08bd15d820a3c087e - Agent: ami-0707563a683a19ed2 + Agent: ami-0752d25f0f3d4e3c2 Parameters: Keypair: Type: AWS::EC2::KeyPair::KeyName diff --git a/harness/determined/deploy/aws/templates/lore.yaml b/harness/determined/deploy/aws/templates/lore.yaml index 6284579725f..bd63e784e5f 100644 --- a/harness/determined/deploy/aws/templates/lore.yaml +++ b/harness/determined/deploy/aws/templates/lore.yaml @@ -3,35 +3,35 @@ Mappings: RegionMap: ap-northeast-1: Master: ami-00910ef9457f0df47 - Agent: ami-0d5ce4472d6286746 + Agent: ami-0afb46883c38208a7 # TODO(DET-4258) Uncomment these when we fully support all P3 regions. # ap-northeast-2: # Master: ami-035e3e44dc41db6a2 - # Agent: ami-0090f68a647f10126 + # Agent: ami-0684df210044c21d5 # ap-southeast-1: # Master: ami-0fd1ee6c8b656f020 - # Agent: ami-048be732b76a4679e + # Agent: ami-0b3d9c646837892ad # ap-southeast-2: # Master: ami-0b62ecd3babd1c548 - # Agent: ami-05c05ba492caa1c68 + # Agent: ami-0db21a1ffe9fb8fd7 eu-central-1: Master: ami-0abbe417ed83c0b29 - Agent: ami-0ebd39eab325463c0 + Agent: ami-08ce183d559b307c8 eu-west-1: Master: ami-0e3f7dd2dc743e48a - Agent: ami-05c7e44456501d01d + Agent: ami-0cfbb4cbdf8e76b4b # eu-west-2: # Master: ami-0d78429fb6af30994 - # Agent: ami-0d3b65b9d8e18b354 + # Agent: ami-0ee850bb0d3dd97f0 us-east-1: Master: ami-0172070f66a8ebe63 - Agent: ami-0b25b64346732d0b5 + Agent: ami-039387004119ccf40 us-east-2: Master: ami-0bafa3699418551cd - Agent: ami-07517c67a90714250 + Agent: ami-07966023b5496e611 us-west-2: Master: ami-0ceeab680f529cc36 - Agent: ami-083f8147aeeba1eb2 + Agent: ami-002e87c2da1a565c9 Parameters: VpcCIDR: diff --git a/harness/determined/deploy/aws/templates/secure.yaml b/harness/determined/deploy/aws/templates/secure.yaml index 72d228afa8a..5012deffe07 100644 --- a/harness/determined/deploy/aws/templates/secure.yaml +++ b/harness/determined/deploy/aws/templates/secure.yaml @@ -4,44 +4,44 @@ Mappings: RegionMap: ap-northeast-1: Master: ami-00910ef9457f0df47 - Agent: ami-0d5ce4472d6286746 + Agent: ami-0afb46883c38208a7 Bastion: ami-00910ef9457f0df47 # TODO(DET-4258) Uncomment these when we fully support all P3 regions. # ap-northeast-2: # Master: ami-035e3e44dc41db6a2 - # Agent: ami-0090f68a647f10126 + # Agent: ami-0684df210044c21d5 # Bastion: ami-035e3e44dc41db6a2 # ap-southeast-1: # Master: ami-0fd1ee6c8b656f020 - # Agent: ami-048be732b76a4679e + # Agent: ami-0b3d9c646837892ad # Bastion: ami-0fd1ee6c8b656f020 # ap-southeast-2: # Master: ami-0b62ecd3babd1c548 - # Agent: ami-05c05ba492caa1c68 + # Agent: ami-0db21a1ffe9fb8fd7 # Bastion: ami-0b62ecd3babd1c548 eu-central-1: Master: ami-0abbe417ed83c0b29 - Agent: ami-0ebd39eab325463c0 + Agent: ami-08ce183d559b307c8 Bastion: ami-0abbe417ed83c0b29 eu-west-1: Master: ami-0e3f7dd2dc743e48a - Agent: ami-05c7e44456501d01d + Agent: ami-0cfbb4cbdf8e76b4b Bastion: ami-0e3f7dd2dc743e48a # eu-west-2: # Master: ami-0d78429fb6af30994 - # Agent: ami-0d3b65b9d8e18b354 + # Agent: ami-0ee850bb0d3dd97f0 # Bastion: ami-0d78429fb6af30994 us-east-1: Master: ami-0172070f66a8ebe63 - Agent: ami-0b25b64346732d0b5 + Agent: ami-039387004119ccf40 Bastion: ami-0172070f66a8ebe63 us-east-2: Master: ami-0bafa3699418551cd - Agent: ami-07517c67a90714250 + Agent: ami-07966023b5496e611 Bastion: ami-0bafa3699418551cd us-west-2: Master: ami-0ceeab680f529cc36 - Agent: ami-083f8147aeeba1eb2 + Agent: ami-002e87c2da1a565c9 Bastion: ami-0ceeab680f529cc36 Parameters: diff --git a/harness/determined/deploy/aws/templates/simple-rds.yaml b/harness/determined/deploy/aws/templates/simple-rds.yaml index 318c59d4f95..e88f22079f1 100644 --- a/harness/determined/deploy/aws/templates/simple-rds.yaml +++ b/harness/determined/deploy/aws/templates/simple-rds.yaml @@ -5,35 +5,35 @@ Mappings: RegionMap: ap-northeast-1: Master: ami-00910ef9457f0df47 - Agent: ami-0d5ce4472d6286746 + Agent: ami-0afb46883c38208a7 # TODO(DET-4258) Uncomment these when we fully support all P3 regions. # ap-northeast-2: # Master: ami-035e3e44dc41db6a2 - # Agent: ami-0090f68a647f10126 + # Agent: ami-0684df210044c21d5 # ap-southeast-1: # Master: ami-0fd1ee6c8b656f020 - # Agent: ami-048be732b76a4679e + # Agent: ami-0b3d9c646837892ad # ap-southeast-2: # Master: ami-0b62ecd3babd1c548 - # Agent: ami-05c05ba492caa1c68 + # Agent: ami-0db21a1ffe9fb8fd7 eu-central-1: Master: ami-0abbe417ed83c0b29 - Agent: ami-0ebd39eab325463c0 + Agent: ami-08ce183d559b307c8 eu-west-1: Master: ami-0e3f7dd2dc743e48a - Agent: ami-05c7e44456501d01d + Agent: ami-0cfbb4cbdf8e76b4b # eu-west-2: # Master: ami-0d78429fb6af30994 - # Agent: ami-0d3b65b9d8e18b354 + # Agent: ami-0ee850bb0d3dd97f0 us-east-1: Master: ami-0172070f66a8ebe63 - Agent: ami-0b25b64346732d0b5 + Agent: ami-039387004119ccf40 us-east-2: Master: ami-0bafa3699418551cd - Agent: ami-07517c67a90714250 + Agent: ami-07966023b5496e611 us-west-2: Master: ami-0ceeab680f529cc36 - Agent: ami-083f8147aeeba1eb2 + Agent: ami-002e87c2da1a565c9 Parameters: Keypair: diff --git a/harness/determined/deploy/aws/templates/simple.yaml b/harness/determined/deploy/aws/templates/simple.yaml index b7b265e7230..57bab8ee38b 100644 --- a/harness/determined/deploy/aws/templates/simple.yaml +++ b/harness/determined/deploy/aws/templates/simple.yaml @@ -5,35 +5,35 @@ Mappings: RegionMap: ap-northeast-1: Master: ami-00910ef9457f0df47 - Agent: ami-0d5ce4472d6286746 + Agent: ami-0afb46883c38208a7 # TODO(DET-4258) Uncomment these when we fully support all P3 regions. # ap-northeast-2: # Master: ami-035e3e44dc41db6a2 - # Agent: ami-0090f68a647f10126 + # Agent: ami-0684df210044c21d5 # ap-southeast-1: # Master: ami-0fd1ee6c8b656f020 - # Agent: ami-048be732b76a4679e + # Agent: ami-0b3d9c646837892ad # ap-southeast-2: # Master: ami-0b62ecd3babd1c548 - # Agent: ami-05c05ba492caa1c68 + # Agent: ami-0db21a1ffe9fb8fd7 eu-central-1: Master: ami-0abbe417ed83c0b29 - Agent: ami-0ebd39eab325463c0 + Agent: ami-08ce183d559b307c8 eu-west-1: Master: ami-0e3f7dd2dc743e48a - Agent: ami-05c7e44456501d01d + Agent: ami-0cfbb4cbdf8e76b4b # eu-west-2: # Master: ami-0d78429fb6af30994 - # Agent: ami-0d3b65b9d8e18b354 + # Agent: ami-0ee850bb0d3dd97f0 us-east-1: Master: ami-0172070f66a8ebe63 - Agent: ami-0b25b64346732d0b5 + Agent: ami-039387004119ccf40 us-east-2: Master: ami-0bafa3699418551cd - Agent: ami-07517c67a90714250 + Agent: ami-07966023b5496e611 us-west-2: Master: ami-0ceeab680f529cc36 - Agent: ami-083f8147aeeba1eb2 + Agent: ami-002e87c2da1a565c9 Parameters: Keypair: diff --git a/harness/determined/deploy/gcp/constants.py b/harness/determined/deploy/gcp/constants.py index 3dbbf206269..8b866bb5dd9 100644 --- a/harness/determined/deploy/gcp/constants.py +++ b/harness/determined/deploy/gcp/constants.py @@ -4,7 +4,7 @@ class defaults: DB_PASSWORD = "postgres" BOOT_DISK_SIZE = 200 BOOT_DISK_TYPE = "pd-standard" - ENVIRONMENT_IMAGE = "det-environments-0e43056" + ENVIRONMENT_IMAGE = "det-environments-f20b027" GPU_NUM = 4 GPU_TYPE = "nvidia-tesla-t4" MASTER_INSTANCE_TYPE = "n1-standard-2" diff --git a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json index c884211ef8d..9c74c4c18ca 100644 --- a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json +++ b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-keras/metadata.json @@ -39,8 +39,8 @@ }, "force_pull_image": false, "image": { - "cpu": "determinedai/tensorflow-ngc-dev:0e43056", - "cuda": "determinedai/tensorflow-ngc-dev:0e43056", + "cpu": "determinedai/tensorflow-ngc-dev:f20b027", + "cuda": "determinedai/tensorflow-ngc-dev:f20b027", "rocm": "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" }, "pod_spec": null, diff --git a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json index 2e8dd411697..3ba90d14855 100644 --- a/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json +++ b/harness/tests/experiment/fixtures/ancient-checkpoints/0.17.6-pytorch/metadata.json @@ -38,8 +38,8 @@ }, "force_pull_image": false, "image": { - "cpu": "determinedai/tensorflow-ngc-dev:0e43056", - "cuda": "determinedai/tensorflow-ngc-dev:0e43056", + "cpu": "determinedai/tensorflow-ngc-dev:f20b027", + "cuda": "determinedai/tensorflow-ngc-dev:f20b027", "rocm": "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" }, "pod_spec": null, diff --git a/harness/tests/fixtures/checkpoint.json b/harness/tests/fixtures/checkpoint.json index 40f68912411..ccee52d70a3 100644 --- a/harness/tests/fixtures/checkpoint.json +++ b/harness/tests/fixtures/checkpoint.json @@ -69,8 +69,8 @@ }, "force_pull_image":false, "image":{ - "cpu":"determinedai/pytorch-ngc-dev:0e43056", - "cuda":"determinedai/pytorch-ngc-dev:0e43056", + "cpu":"determinedai/pytorch-ngc-dev:f20b027", + "cuda":"determinedai/pytorch-ngc-dev:f20b027", "rocm":"determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" }, "pod_spec":null, diff --git a/helm/charts/determined/values.yaml b/helm/charts/determined/values.yaml index 7ce68b82af5..a9c20425e0b 100644 --- a/helm/charts/determined/values.yaml +++ b/helm/charts/determined/values.yaml @@ -31,8 +31,8 @@ defaultImages: kubeSchedulerPreemption: "determinedai/kube-scheduler:0.17.0" # default images for CPU and GPU environments - cpuImage: "determinedai/pytorch-ngc-dev:0e43056" - gpuImage: "determinedai/pytorch-ngc-dev:0e43056" + cpuImage: "determinedai/pytorch-ngc-dev:f20b027" + gpuImage: "determinedai/pytorch-ngc-dev:f20b027" # Install Determined enterprise edition. enterpriseEdition: false diff --git a/master/internal/config/provconfig/aws_config.go b/master/internal/config/provconfig/aws_config.go index b462724f7c0..feafe9bce9f 100644 --- a/master/internal/config/provconfig/aws_config.go +++ b/master/internal/config/provconfig/aws_config.go @@ -50,16 +50,16 @@ type AWSClusterConfig struct { } var defaultAWSImageID = map[string]string{ - "ap-northeast-1": "ami-0d5ce4472d6286746", - "ap-northeast-2": "ami-0090f68a647f10126", - "ap-southeast-1": "ami-048be732b76a4679e", - "ap-southeast-2": "ami-05c05ba492caa1c68", - "us-east-2": "ami-07517c67a90714250", - "us-east-1": "ami-0b25b64346732d0b5", - "us-west-2": "ami-083f8147aeeba1eb2", - "eu-central-1": "ami-0ebd39eab325463c0", - "eu-west-2": "ami-0d3b65b9d8e18b354", - "eu-west-1": "ami-05c7e44456501d01d", + "ap-northeast-1": "ami-0afb46883c38208a7", + "ap-northeast-2": "ami-0684df210044c21d5", + "ap-southeast-1": "ami-0b3d9c646837892ad", + "ap-southeast-2": "ami-0db21a1ffe9fb8fd7", + "us-east-2": "ami-07966023b5496e611", + "us-east-1": "ami-039387004119ccf40", + "us-west-2": "ami-002e87c2da1a565c9", + "eu-central-1": "ami-08ce183d559b307c8", + "eu-west-2": "ami-0ee850bb0d3dd97f0", + "eu-west-1": "ami-0cfbb4cbdf8e76b4b", } var defaultAWSClusterConfig = AWSClusterConfig{ diff --git a/master/internal/config/provconfig/gcp_config.go b/master/internal/config/provconfig/gcp_config.go index 5f6db4b2712..1fd4ac3fbaf 100644 --- a/master/internal/config/provconfig/gcp_config.go +++ b/master/internal/config/provconfig/gcp_config.go @@ -56,7 +56,7 @@ type GCPClusterConfig struct { func DefaultGCPClusterConfig() *GCPClusterConfig { return &GCPClusterConfig{ BootDiskSize: 200, - BootDiskSourceImage: "projects/determined-ai/global/images/det-environments-0e43056", + BootDiskSourceImage: "projects/determined-ai/global/images/det-environments-f20b027", LabelKey: "managed-by", InstanceType: gceInstanceType{ MachineType: "n1-standard-32", diff --git a/master/pkg/schemas/expconf/const.go b/master/pkg/schemas/expconf/const.go index 5d210527b43..ffb8ccf786d 100644 --- a/master/pkg/schemas/expconf/const.go +++ b/master/pkg/schemas/expconf/const.go @@ -8,7 +8,7 @@ const ( // Default task environment docker image names. const ( - CPUImage = "determinedai/pytorch-ngc-dev:0e43056" - CUDAImage = "determinedai/pytorch-ngc-dev:0e43056" + CPUImage = "determinedai/pytorch-ngc-dev:f20b027" + CUDAImage = "determinedai/pytorch-ngc-dev:f20b027" ROCMImage = "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512" ) diff --git a/model_hub/Makefile b/model_hub/Makefile index 4a543ca5ba8..d1c434eecfc 100644 --- a/model_hub/Makefile +++ b/model_hub/Makefile @@ -5,7 +5,7 @@ SHORT_GIT_HASH := $(shell git rev-parse --short HEAD) ARTIFACTS_DIR := /tmp/artifacts # Model-hub library environments will be built on top of the default GPU and CPU images in master/pkg/model/defaults.go -DEFAULT_GPU_IMAGE := determinedai/pytorch-tensorflow-cuda-dev:0e43056 +DEFAULT_GPU_IMAGE := determinedai/pytorch-tensorflow-cuda-dev:f20b027 ############REMINDER############ # When bumping third-party library versions, remember to bump versions in diff --git a/schemas/test_cases/v0/experiment.yaml b/schemas/test_cases/v0/experiment.yaml index 9491c026c58..704a53f6fa4 100644 --- a/schemas/test_cases/v0/experiment.yaml +++ b/schemas/test_cases/v0/experiment.yaml @@ -47,8 +47,8 @@ environment_variables: {} force_pull_image: false image: - cpu: determinedai/pytorch-ngc-dev:0e43056 - cuda: determinedai/pytorch-ngc-dev:0e43056 + cpu: determinedai/pytorch-ngc-dev:f20b027 + cuda: determinedai/pytorch-ngc-dev:f20b027 rocm: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512 pod_spec: null ports: diff --git a/tools/scripts/bumpenvs.yaml b/tools/scripts/bumpenvs.yaml index 5ba0a89772c..a35c3e6d3a0 100644 --- a/tools/scripts/bumpenvs.yaml +++ b/tools/scripts/bumpenvs.yaml @@ -1,20 +1,20 @@ -ap_northeast_1_agent_ami: {new: ami-0d5ce4472d6286746, old: ami-0b9655c3686ad290f} +ap_northeast_1_agent_ami: {new: ami-0afb46883c38208a7, old: ami-0d5ce4472d6286746} ap_northeast_1_bastion_ami: {new: ami-00910ef9457f0df47, old: ami-0c7cb70d3eb61492b} ap_northeast_1_master_ami: {new: ami-00910ef9457f0df47, old: ami-0c7cb70d3eb61492b} -ap_northeast_2_agent_ami: {new: ami-0090f68a647f10126, old: ami-0dc620552c1aaa2cf} +ap_northeast_2_agent_ami: {new: ami-0684df210044c21d5, old: ami-0090f68a647f10126} ap_northeast_2_bastion_ami: {new: ami-035e3e44dc41db6a2, old: ami-003bb1772f36a39a3} ap_northeast_2_master_ami: {new: ami-035e3e44dc41db6a2, old: ami-003bb1772f36a39a3} -ap_southeast_1_agent_ami: {new: ami-048be732b76a4679e, old: ami-0adbb66c690fafe37} +ap_southeast_1_agent_ami: {new: ami-0b3d9c646837892ad, old: ami-048be732b76a4679e} ap_southeast_1_bastion_ami: {new: ami-0fd1ee6c8b656f020, old: ami-09f03fa5572692399} ap_southeast_1_master_ami: {new: ami-0fd1ee6c8b656f020, old: ami-09f03fa5572692399} -ap_southeast_2_agent_ami: {new: ami-05c05ba492caa1c68, old: ami-0a453138b8d55c36d} +ap_southeast_2_agent_ami: {new: ami-0db21a1ffe9fb8fd7, old: ami-05c05ba492caa1c68} ap_southeast_2_bastion_ami: {new: ami-0b62ecd3babd1c548, old: ami-06139e5e22cc2f7b1} ap_southeast_2_master_ami: {new: ami-0b62ecd3babd1c548, old: ami-06139e5e22cc2f7b1} deepspeed_0_hashed: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-748dda4, old: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-079eb6d} deepspeed_0_versioned: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-0.31.1, old: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-0.30.1} -deepspeed_gpt_neox_0_hashed: {new: determinedai/deepspeed-cuda-gpt-neox:0e43056, old: determinedai/deepspeed-cuda-gpt-neox:8c90e80} +deepspeed_gpt_neox_0_hashed: {new: determinedai/deepspeed-cuda-gpt-neox:f20b027, old: determinedai/deepspeed-cuda-gpt-neox:0e43056} deepspeed_gpu_0_hashed: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-2196775, old: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-f66cbce} deepspeed_gpu_0_versioned: {new: determinedai/environments:cuda-11.3-pytorch-1.10-deepspeed-0.8.3-gpu-0.29.1, @@ -23,16 +23,16 @@ deepspeed_gpu_1_hashed: {new: determinedai/environments:cuda-11.3-pytorch-1.10-tf-2.8-deepspeed-0.7.0-gpu-mpi-9119094} deepspeed_gpu_1_versioned: {new: determinedai/environments:cuda-11.3-pytorch-1.10-tf-2.8-deepspeed-0.7.0-gpu-mpi-0.19.1} -eu_central_1_agent_ami: {new: ami-0ebd39eab325463c0, old: ami-09284ff11565b3ae3} +eu_central_1_agent_ami: {new: ami-08ce183d559b307c8, old: ami-0ebd39eab325463c0} eu_central_1_bastion_ami: {new: ami-0abbe417ed83c0b29, old: ami-0b81e95bb0a06ea8c} eu_central_1_master_ami: {new: ami-0abbe417ed83c0b29, old: ami-0b81e95bb0a06ea8c} -eu_west_1_agent_ami: {new: ami-05c7e44456501d01d, old: ami-037b373c0075ea120} +eu_west_1_agent_ami: {new: ami-0cfbb4cbdf8e76b4b, old: ami-05c7e44456501d01d} eu_west_1_bastion_ami: {new: ami-0e3f7dd2dc743e48a, old: ami-029cfca952b331b52} eu_west_1_master_ami: {new: ami-0e3f7dd2dc743e48a, old: ami-029cfca952b331b52} -eu_west_2_agent_ami: {new: ami-0d3b65b9d8e18b354, old: ami-0a523360a75ece477} +eu_west_2_agent_ami: {new: ami-0ee850bb0d3dd97f0, old: ami-0d3b65b9d8e18b354} eu_west_2_bastion_ami: {new: ami-0d78429fb6af30994, old: ami-035469b606478d63d} eu_west_2_master_ami: {new: ami-0d78429fb6af30994, old: ami-035469b606478d63d} -gcp_env: {new: det-environments-0e43056, old: det-environments-8c90e80} +gcp_env: {new: det-environments-f20b027, old: det-environments-0e43056} gpt_neox_deepspeed_0_hashed: {new: determinedai/environments:cuda-11.3-pytorch-1.10-gpt-neox-deepspeed-gpu-748dda4, old: determinedai/environments:cuda-11.3-pytorch-1.10-gpt-neox-deepspeed-gpu-079eb6d} gpt_neox_deepspeed_0_versioned: {new: determinedai/environments:cuda-11.3-pytorch-1.10-gpt-neox-deepspeed-gpu-0.31.1, @@ -81,8 +81,8 @@ pytorch10_tf27_rocm50_0_hashed: {new: determinedai/environments:rocm-5.0-pytorch old: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-bf9480b} pytorch10_tf27_rocm50_0_versioned: {new: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4, old: determinedai/environments-dev:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4} -pytorch13_tf210_rocm56_0_hashed: {new: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-0e43056, - old: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-8c90e80} +pytorch13_tf210_rocm56_0_hashed: {new: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-f20b027, + old: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-0e43056} pytorch13_tf210_rocm56_0_versioned: {new: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-0.33.1, old: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-0.31.2} pytorch13_tf210_rocm56_1_hashed: {new: determinedai/environments:rocm-5.6-pytorch-1.3-tf-2.10-rocm-ompi-2196775, @@ -95,29 +95,29 @@ pytorch19_tf25_rocm_0_versioned: {new: determinedai/environments:rocm-5.0-pytorc old: determinedai/environments:rocm-4.2-pytorch-1.9-tf-2.5-rocm-0.18.5} pytorch19_tf25_rocm_1_hashed: {new: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-096d730} pytorch19_tf25_rocm_1_versioned: {new: determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.19.4} -pytorch20_tf210_rocm56_0_hashed: {new: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0e43056, - old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-8c90e80} +pytorch20_tf210_rocm56_0_hashed: {new: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-f20b027, + old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0e43056} pytorch20_tf210_rocm56_0_versioned: {new: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.33.1, old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.31.2} pytorch20_tf210_rocm56_1_hashed: {new: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-2196775, old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-f66cbce} pytorch20_tf210_rocm56_1_versioned: {new: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.29.1, old: determinedai/environments:rocm-5.6-pytorch-2.0-tf-2.10-rocm-ompi-0.27.1} -pytorch_cpu_0_hashed: {new: determinedai/pytorch-cpu-dev:0e43056, old: determinedai/pytorch-cpu-dev:8c90e80} -pytorch_cpu_1_hashed: {new: determinedai/pytorch-cpu-hpc-dev:0e43056, old: determinedai/pytorch-cpu-hpc-dev:8c90e80} -pytorch_cuda_0_hashed: {new: determinedai/pytorch-cuda-dev:0e43056, old: determinedai/pytorch-cuda-dev:8c90e80} -pytorch_cuda_1_hashed: {new: determinedai/pytorch-cuda-hpc-dev:0e43056, old: determinedai/pytorch-cuda-hpc-dev:8c90e80} -pytorch_ngc_hashed: {new: determinedai/pytorch-ngc-dev:0e43056, old: determinedai/pytorch-ngc-dev:8c90e80} -pytorch_ngc_hpc_hashed: {new: determinedai/pytorch-ngc-hpc-dev:0e43056, old: determinedai/pytorch-ngc-hpc-dev:8c90e80} -tensorflow_cpu_0_hashed: {new: determinedai/pytorch-tensorflow-cpu-dev:0e43056, old: determinedai/pytorch-tensorflow-cpu-dev:8c90e80} -tensorflow_cpu_1_hashed: {new: determinedai/pytorch-tensorflow-cpu-hpc-dev:0e43056, - old: determinedai/pytorch-tensorflow-cpu-hpc-dev:8c90e80} -tensorflow_cuda_0_hashed: {new: determinedai/pytorch-tensorflow-cuda-dev:0e43056, - old: determinedai/pytorch-tensorflow-cuda-dev:8c90e80} -tensorflow_cuda_1_hashed: {new: determinedai/pytorch-tensorflow-cuda-hpc-dev:0e43056, - old: determinedai/pytorch-tensorflow-cuda-hpc-dev:8c90e80} -tensorflow_ngc_hashed: {new: determinedai/tensorflow-ngc-dev:0e43056, old: determinedai/tensorflow-ngc-dev:8c90e80} -tensorflow_ngc_hpc_hashed: {new: determinedai/tensorflow-ngc-hpc-dev:0e43056, old: determinedai/tensorflow-ngc-hpc-dev:8c90e80} +pytorch_cpu_0_hashed: {new: determinedai/pytorch-cpu-dev:f20b027, old: determinedai/pytorch-cpu-dev:0e43056} +pytorch_cpu_1_hashed: {new: determinedai/pytorch-cpu-hpc-dev:f20b027, old: determinedai/pytorch-cpu-hpc-dev:0e43056} +pytorch_cuda_0_hashed: {new: determinedai/pytorch-cuda-dev:f20b027, old: determinedai/pytorch-cuda-dev:0e43056} +pytorch_cuda_1_hashed: {new: determinedai/pytorch-cuda-hpc-dev:f20b027, old: determinedai/pytorch-cuda-hpc-dev:0e43056} +pytorch_ngc_hashed: {new: determinedai/pytorch-ngc-dev:f20b027, old: determinedai/pytorch-ngc-dev:0e43056} +pytorch_ngc_hpc_hashed: {new: determinedai/pytorch-ngc-hpc-dev:f20b027, old: determinedai/pytorch-ngc-hpc-dev:0e43056} +tensorflow_cpu_0_hashed: {new: determinedai/pytorch-tensorflow-cpu-dev:f20b027, old: determinedai/pytorch-tensorflow-cpu-dev:0e43056} +tensorflow_cpu_1_hashed: {new: determinedai/pytorch-tensorflow-cpu-hpc-dev:f20b027, + old: determinedai/pytorch-tensorflow-cpu-hpc-dev:0e43056} +tensorflow_cuda_0_hashed: {new: determinedai/pytorch-tensorflow-cuda-dev:f20b027, + old: determinedai/pytorch-tensorflow-cuda-dev:0e43056} +tensorflow_cuda_1_hashed: {new: determinedai/pytorch-tensorflow-cuda-hpc-dev:f20b027, + old: determinedai/pytorch-tensorflow-cuda-hpc-dev:0e43056} +tensorflow_ngc_hashed: {new: determinedai/tensorflow-ngc-dev:f20b027, old: determinedai/tensorflow-ngc-dev:0e43056} +tensorflow_ngc_hpc_hashed: {new: determinedai/tensorflow-ngc-hpc-dev:f20b027, old: determinedai/tensorflow-ngc-hpc-dev:0e43056} tf24_cpu_0_hashed: {new: determinedai/environments:py-3.8-pytorch-1.9-tf-2.4-cpu-24586f0, old: determinedai/environments-dev:py-3.8-pytorch-1.9-tf-2.4-cpu-1c769fb} tf24_cpu_0_versioned: {new: determinedai/environments:py-3.8-pytorch-1.9-tf-2.4-cpu-0.19.10, @@ -208,16 +208,16 @@ tf2_gpu_1_hashed: {new: determinedai/environments:cuda-11.3-pytorch-1.12-tf-2.11 old: determinedai/environments:cuda-11.3-pytorch-1.12-tf-2.11-gpu-mpi-079eb6d} tf2_gpu_1_versioned: {new: determinedai/environments:cuda-11.3-pytorch-1.12-tf-2.11-gpu-mpi-ofi-0.31.1, old: determinedai/environments:cuda-11.3-pytorch-1.12-tf-2.11-gpu-mpi-0.30.1} -us_east_1_agent_ami: {new: ami-0b25b64346732d0b5, old: ami-0f8ed0567336433e2} +us_east_1_agent_ami: {new: ami-039387004119ccf40, old: ami-0b25b64346732d0b5} us_east_1_bastion_ami: {new: ami-0172070f66a8ebe63, old: ami-0b93ce03dcbcb10f6} us_east_1_master_ami: {new: ami-0172070f66a8ebe63, old: ami-0b93ce03dcbcb10f6} -us_east_2_agent_ami: {new: ami-07517c67a90714250, old: ami-09578b4c5ea532f24} +us_east_2_agent_ami: {new: ami-07966023b5496e611, old: ami-07517c67a90714250} us_east_2_bastion_ami: {new: ami-0bafa3699418551cd, old: ami-0cbea92f2377277a4} us_east_2_master_ami: {new: ami-0bafa3699418551cd, old: ami-0cbea92f2377277a4} -us_gov_east_1_agent_ami: {new: ami-0cc63f942a6c1e5fd, old: ami-09a56ff763262c365} +us_gov_east_1_agent_ami: {new: ami-079d6ccabd0db16a2, old: ami-0cc63f942a6c1e5fd} us_gov_east_1_master_ami: {new: ami-04ef693ebcf519dc3, old: ami-01d71f6009765d511} -us_gov_west_1_agent_ami: {new: ami-0707563a683a19ed2, old: ami-0e7d4dc4ebb742216} +us_gov_west_1_agent_ami: {new: ami-0752d25f0f3d4e3c2, old: ami-0707563a683a19ed2} us_gov_west_1_master_ami: {new: ami-08bd15d820a3c087e, old: ami-0b64b04df085adbf1} -us_west_2_agent_ami: {new: ami-083f8147aeeba1eb2, old: ami-0c8ad935d75f2f73d} +us_west_2_agent_ami: {new: ami-002e87c2da1a565c9, old: ami-083f8147aeeba1eb2} us_west_2_bastion_ami: {new: ami-0ceeab680f529cc36, old: ami-0d31d7c9fc9503726} us_west_2_master_ami: {new: ami-0ceeab680f529cc36, old: ami-0d31d7c9fc9503726} diff --git a/tools/scripts/environments-target.txt b/tools/scripts/environments-target.txt index ae1aefd0383..77bfae508c0 100644 --- a/tools/scripts/environments-target.txt +++ b/tools/scripts/environments-target.txt @@ -1 +1 @@ -0e43056 +f20b027 diff --git a/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json b/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json index 0ef8ad15fbd..0cc967d34f1 100644 --- a/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json +++ b/webui/react/src/fixtures/responses/experiment-details/non-scalar-metrics-4078.json @@ -32,8 +32,8 @@ "name": "Fork of Fork of mnist_tp_to_estimator_const", "environment": { "image": { - "cpu": "determinedai/pytorch-ngc-dev:0e43056", - "gpu": "determinedai/pytorch-ngc-dev:0e43056" + "cpu": "determinedai/pytorch-ngc-dev:f20b027", + "gpu": "determinedai/pytorch-ngc-dev:f20b027" }, "ports": null, "pod_spec": null, diff --git a/webui/react/src/fixtures/responses/experiment-details/set-a.json b/webui/react/src/fixtures/responses/experiment-details/set-a.json index 86eda3d0c10..5f9ab32fc5d 100644 --- a/webui/react/src/fixtures/responses/experiment-details/set-a.json +++ b/webui/react/src/fixtures/responses/experiment-details/set-a.json @@ -694,8 +694,8 @@ "environment_variables": {}, "force_pull_image": false, "image": { - "cpu": "determinedai/pytorch-ngc-dev:0e43056", - "gpu": "determinedai/pytorch-ngc-dev:0e43056" + "cpu": "determinedai/pytorch-ngc-dev:f20b027", + "gpu": "determinedai/pytorch-ngc-dev:f20b027" }, "pod_spec": null, "ports": null @@ -838,8 +838,8 @@ "environment_variables": {}, "force_pull_image": false, "image": { - "cpu": "determinedai/tensorflow-ngc-dev:0e43056", - "gpu": "determinedai/tensorflow-ngc-dev:0e43056" + "cpu": "determinedai/tensorflow-ngc-dev:f20b027", + "gpu": "determinedai/tensorflow-ngc-dev:f20b027" }, "pod_spec": { "metadata": { diff --git a/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json b/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json index ab343b7b21d..f14934b0b15 100644 --- a/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json +++ b/webui/react/src/fixtures/responses/trial-details/old-trial-config-noop-adaptive.json @@ -30,8 +30,8 @@ "name": "noop_adaptive", "environment": { "image": { - "cpu": "determinedai/pytorch-ngc-dev:0e43056", - "gpu": "determinedai/pytorch-ngc-dev:0e43056" + "cpu": "determinedai/pytorch-ngc-dev:f20b027", + "gpu": "determinedai/pytorch-ngc-dev:f20b027" }, "ports": null, "force_pull_image": false,