From 3cc3ab64246af6b5bc90fbc525e90762c10ce11b Mon Sep 17 00:00:00 2001
From: Daniel King <daniel@mosaicml.com>
Date: Thu, 15 Feb 2024 17:27:38 -0800
Subject: [PATCH 1/2] update it

---
 .github/workflows/pr-cpu.yaml         |  4 ----
 .github/workflows/pr-gpu.yaml         | 10 ----------
 README.md                             | 12 ++++++------
 mcli/mcli-1b-max-seq-len-8k.yaml      |  4 ++--
 mcli/mcli-1b.yaml                     |  4 ++--
 mcli/mcli-benchmark-mpt.yaml          |  4 ++--
 mcli/mcli-convert-composer-to-hf.yaml |  2 +-
 mcli/mcli-hf-eval.yaml                |  4 ++--
 mcli/mcli-hf-generate.yaml            |  4 ++--
 mcli/mcli-llama2-finetune.yaml        |  4 ++--
 mcli/mcli-openai-eval.yaml            |  4 ++--
 mcli/mcli-pretokenize-oci-upload.yaml |  2 +-
 setup.py                              |  2 +-
 13 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
index 8ebf7de053..0bba0fadb9 100644
--- a/.github/workflows/pr-cpu.yaml
+++ b/.github/workflows/pr-cpu.yaml
@@ -19,10 +19,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "cpu-2.1.0"
-          container: mosaicml/pytorch:2.1.0_cpu-python3.10-ubuntu20.04
-          markers: "not gpu"
-          pytest_command: "coverage run -m pytest"
         - name: "cpu-2.2.0"
           container: mosaicml/pytorch:2.2.0_cpu-python3.11-ubuntu20.04
           markers: "not gpu"
diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
index f7dbd16b2c..05ba590342 100644
--- a/.github/workflows/pr-gpu.yaml
+++ b/.github/workflows/pr-gpu.yaml
@@ -19,16 +19,6 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "gpu-2.1.0"
-          container: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          deps_group: "all"
-        - name: "gpu-2.1.0-flash2"
-          container: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
-          markers: "gpu"
-          pytest_command: "coverage run -m pytest"
-          deps_group: "all-flash2"
         - name: "gpu-2.2.0"
           container: mosaicml/pytorch:2.2.0_cu121-python3.11-ubuntu20.04
           markers: "gpu"
diff --git a/README.md b/README.md
index 6668476fd4..94a95daa77 100644
--- a/README.md
+++ b/README.md
@@ -92,14 +92,14 @@ Something missing? Contribute with a PR!
 
 
 # Hardware and Software Requirements
-This codebase has been tested with PyTorch 2.1 with NVIDIA A100s and H100s.
+This codebase has been tested with PyTorch 2.2 with NVIDIA A100s and H100s.
 This codebase may also work on systems with other devices, such as consumer NVIDIA cards and AMD cards, but we are not actively testing these systems.
 If you have success/failure using LLM Foundry on other systems, please let us know in a Github issue and we will update the support matrix!
 
 | Device         | Torch Version | Cuda Version | Status                       |
 | -------------- | ------------- | ------------ | ---------------------------- |
-| A100-40GB/80GB | 2.1.0         | 12.1         | :white_check_mark: Supported |
-| H100-80GB      | 2.1.0         | 12.1         | :white_check_mark: Supported |
+| A100-40GB/80GB | 2.2.0         | 12.1         | :white_check_mark: Supported |
+| H100-80GB      | 2.2.0         | 12.1         | :white_check_mark: Supported |
 
 ## MosaicML Docker Images
 We highly recommend using our prebuilt Docker images. You can find them here: https://hub.docker.com/orgs/mosaicml/repositories.
@@ -113,9 +113,9 @@ You can select a specific commit hash such as `mosaicml/llm-foundry:1.13.1_cu117
 
 | Docker Image                                           | Torch Version | Cuda Version      | LLM Foundry dependencies installed? |
 | ------------------------------------------------------ | ------------- | ----------------- | ----------------------------------- |
-| `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04`  | 2.1.2         | 12.1 (Infiniband) | No                                  |
-| `mosaicml/llm-foundry:2.1.2_cu121_flash2-latest`       | 2.1.2         | 12.1 (Infiniband) | Yes                                 |
-| `mosaicml/llm-foundry:2.1.2_cu121_flash2_aws-latest`   | 2.1.2         | 12.1 (EFA)        | Yes                                 |
+| `mosaicml/pytorch:2.2.0_cu121-python3.11-ubuntu20.04`  | 2.2.0         | 12.1 (Infiniband) | No                                  |
+| `mosaicml/llm-foundry:2.2.0_cu121_flash2-latest`       | 2.2.0         | 12.1 (Infiniband) | Yes                                 |
+| `mosaicml/llm-foundry:2.2.0_cu121_flash2_aws-latest`   | 2.2.0         | 12.1 (EFA)        | Yes                                 |
 
 
 # Installation
diff --git a/mcli/mcli-1b-max-seq-len-8k.yaml b/mcli/mcli-1b-max-seq-len-8k.yaml
index 5c6b38be6d..3963b4a8d4 100644
--- a/mcli/mcli-1b-max-seq-len-8k.yaml
+++ b/mcli/mcli-1b-max-seq-len-8k.yaml
@@ -3,7 +3,7 @@ integrations:
   git_repo: mosaicml/llm-foundry
   git_branch: v0.5.0
   # git_commit:  # OR use your commit hash
-  pip_install: -e .[gpu]
+  pip_install: -e .[gpu-flash2]
   ssh_clone: false  # Should be true if using a private repo
 
 # We are fetching, converting, and training on the 'val' split
@@ -17,7 +17,7 @@ command: |
     --out_root ./my-copy-c4 --splits train_small val_small \
     --concat_tokens 8192 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>'
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 name: mpt-1b-ctx-8k-gpus-8
 
 compute:
diff --git a/mcli/mcli-1b.yaml b/mcli/mcli-1b.yaml
index f8e782ad84..96965ddcb8 100644
--- a/mcli/mcli-1b.yaml
+++ b/mcli/mcli-1b.yaml
@@ -3,7 +3,7 @@ integrations:
   git_repo: mosaicml/llm-foundry
   git_branch: v0.5.0
   # git_commit:  # OR use your commit hash
-  pip_install: -e .[gpu]
+  pip_install: -e .[gpu-flash2]
   ssh_clone: false  # Should be true if using a private repo
 
 # We are fetching, converting, and training on the 'val' split
@@ -21,7 +21,7 @@ command: |
     eval_loader.dataset.split=val_small \
     max_duration=100ba \
     eval_interval=0
-image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 name: mpt-1b-gpus-8
 
 compute:
diff --git a/mcli/mcli-benchmark-mpt.yaml b/mcli/mcli-benchmark-mpt.yaml
index ae799abb4c..104157cf27 100644
--- a/mcli/mcli-benchmark-mpt.yaml
+++ b/mcli/mcli-benchmark-mpt.yaml
@@ -6,14 +6,14 @@ compute:
   # cluster: TODO # Name of the cluster to use for this run
   # gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments
 
-image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
   git_branch: v0.5.0
   # git_commit: # OR use your commit hash
-  pip_install: ".[gpu]"
+  pip_install: ".[gpu-flash2]"
 
 command: |
   cd llm-foundry/scripts/inference/benchmarking
diff --git a/mcli/mcli-convert-composer-to-hf.yaml b/mcli/mcli-convert-composer-to-hf.yaml
index 5904cad522..d62648c042 100644
--- a/mcli/mcli-convert-composer-to-hf.yaml
+++ b/mcli/mcli-convert-composer-to-hf.yaml
@@ -13,7 +13,7 @@ command: |
     --hf_output_path s3://bucket/folder/hf/ \
     --output_precision bf16 \
 
-image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 name: convert-composer-hf
 
 compute:
diff --git a/mcli/mcli-hf-eval.yaml b/mcli/mcli-hf-eval.yaml
index 92faf8233b..0249cd2495 100644
--- a/mcli/mcli-hf-eval.yaml
+++ b/mcli/mcli-hf-eval.yaml
@@ -3,7 +3,7 @@ integrations:
   git_repo: mosaicml/llm-foundry
   git_branch: v0.5.0
   # git_commit:  # OR use your commit hash
-  pip_install: -e ".[gpu]"
+  pip_install: -e ".[gpu-flash2]"
   ssh_clone: false  # Should be true if using a private repo
 
 command: |
@@ -16,7 +16,7 @@ gpu_num: 8
 # gpu_type:
 # cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:
diff --git a/mcli/mcli-hf-generate.yaml b/mcli/mcli-hf-generate.yaml
index 6f38db07b3..b155d362fb 100644
--- a/mcli/mcli-hf-generate.yaml
+++ b/mcli/mcli-hf-generate.yaml
@@ -3,7 +3,7 @@ integrations:
   git_repo: mosaicml/llm-foundry
   git_branch: v0.5.0
   # git_commit: # OR use your commit hash
-  pip_install: -e .[gpu]
+  pip_install: -e .[gpu-flash2]
   ssh_clone: false  # Should be true if using a private repo
 
 command: |
@@ -35,7 +35,7 @@ command: |
       "Here's a quick recipe for baking chocolate chip cookies: Start by" \
       "The best 5 cities to visit in Europe are"
 
-image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 name: hf-generate
 
 compute:
diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml
index 110c92889a..763a4f8355 100644
--- a/mcli/mcli-llama2-finetune.yaml
+++ b/mcli/mcli-llama2-finetune.yaml
@@ -3,13 +3,13 @@ integrations:
   git_repo: mosaicml/llm-foundry
   git_branch: v0.5.0
   # git_commit: # OR use your commit hash
-  pip_install: -e .[gpu]
+  pip_install: -e .[gpu-flash2]
   ssh_clone: false  # Should be true if using a private repo
 
 command: |
   cd llm-foundry/scripts
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:1.13.1_cu117-latest
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 name: llama2-finetune
 
 compute:
diff --git a/mcli/mcli-openai-eval.yaml b/mcli/mcli-openai-eval.yaml
index 5461c59d49..b2536c4a1a 100644
--- a/mcli/mcli-openai-eval.yaml
+++ b/mcli/mcli-openai-eval.yaml
@@ -3,7 +3,7 @@ integrations:
   git_repo: mosaicml/llm-foundry
   git_branch: v0.5.0
   # git_commit:  # OR use your commit hash
-  pip_install: -e ".[gpu,openai]"
+  pip_install: -e ".[gpu-flash2,openai]"
   ssh_clone: false  # Should be true if using a private repo
 
 command: |
@@ -16,7 +16,7 @@ gpu_num:  #
 gpu_type:  #
 cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:
diff --git a/mcli/mcli-pretokenize-oci-upload.yaml b/mcli/mcli-pretokenize-oci-upload.yaml
index c9c7ad9f5e..5de4c447a6 100644
--- a/mcli/mcli-pretokenize-oci-upload.yaml
+++ b/mcli/mcli-pretokenize-oci-upload.yaml
@@ -1,5 +1,5 @@
 name: c4-2k-pre-tokenized
-image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
+image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
 compute:
   gpus: 8  # Number of GPUs to use
 
diff --git a/setup.py b/setup.py
index 4dc771a7b4..fefa5f550a 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@
     'accelerate>=0.25,<0.26',  # for HF inference `device_map`
     'transformers>=4.37,<4.38',
     'mosaicml-streaming>=0.7.4,<0.8',
-    'torch>=2.1,<2.3',
+    'torch>=2.2,<2.3',
     'datasets>=2.16,<2.17',
     'fsspec==2023.6.0',  # newer version results in a bug in datasets that duplicates data
     'sentencepiece==0.1.97',

From 78d91eb9cf5a2c46417ef412ce04712257b187a4 Mon Sep 17 00:00:00 2001
From: Daniel King <daniel@mosaicml.com>
Date: Thu, 15 Feb 2024 17:30:28 -0800
Subject: [PATCH 2/2] fix

---
 mcli/mcli-1b-eval.yaml                | 2 +-
 mcli/mcli-1b-max-seq-len-8k.yaml      | 2 +-
 mcli/mcli-1b.yaml                     | 2 +-
 mcli/mcli-benchmark-mpt.yaml          | 2 +-
 mcli/mcli-convert-composer-to-hf.yaml | 2 +-
 mcli/mcli-hf-eval.yaml                | 2 +-
 mcli/mcli-hf-generate.yaml            | 2 +-
 mcli/mcli-llama2-finetune.yaml        | 2 +-
 mcli/mcli-openai-eval.yaml            | 2 +-
 mcli/mcli-pretokenize-oci-upload.yaml | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/mcli/mcli-1b-eval.yaml b/mcli/mcli-1b-eval.yaml
index a8b5ed0112..9ae77af6ca 100644
--- a/mcli/mcli-1b-eval.yaml
+++ b/mcli/mcli-1b-eval.yaml
@@ -9,7 +9,7 @@ integrations:
 command: |
   cd llm-foundry/scripts/
   composer eval/eval.py /mnt/config/parameters.yaml
-image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 name: mpt-1b-eval
 
 compute:
diff --git a/mcli/mcli-1b-max-seq-len-8k.yaml b/mcli/mcli-1b-max-seq-len-8k.yaml
index 3963b4a8d4..e413c3bf81 100644
--- a/mcli/mcli-1b-max-seq-len-8k.yaml
+++ b/mcli/mcli-1b-max-seq-len-8k.yaml
@@ -17,7 +17,7 @@ command: |
     --out_root ./my-copy-c4 --splits train_small val_small \
     --concat_tokens 8192 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>'
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 name: mpt-1b-ctx-8k-gpus-8
 
 compute:
diff --git a/mcli/mcli-1b.yaml b/mcli/mcli-1b.yaml
index 96965ddcb8..3713d29cc9 100644
--- a/mcli/mcli-1b.yaml
+++ b/mcli/mcli-1b.yaml
@@ -21,7 +21,7 @@ command: |
     eval_loader.dataset.split=val_small \
     max_duration=100ba \
     eval_interval=0
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 name: mpt-1b-gpus-8
 
 compute:
diff --git a/mcli/mcli-benchmark-mpt.yaml b/mcli/mcli-benchmark-mpt.yaml
index 104157cf27..cb8adcac00 100644
--- a/mcli/mcli-benchmark-mpt.yaml
+++ b/mcli/mcli-benchmark-mpt.yaml
@@ -6,7 +6,7 @@ compute:
   # cluster: TODO # Name of the cluster to use for this run
   # gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments
 
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 
 integrations:
 - integration_type: git_repo
diff --git a/mcli/mcli-convert-composer-to-hf.yaml b/mcli/mcli-convert-composer-to-hf.yaml
index d62648c042..8ef894bf85 100644
--- a/mcli/mcli-convert-composer-to-hf.yaml
+++ b/mcli/mcli-convert-composer-to-hf.yaml
@@ -13,7 +13,7 @@ command: |
     --hf_output_path s3://bucket/folder/hf/ \
     --output_precision bf16 \
 
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 name: convert-composer-hf
 
 compute:
diff --git a/mcli/mcli-hf-eval.yaml b/mcli/mcli-hf-eval.yaml
index 0249cd2495..6800319df2 100644
--- a/mcli/mcli-hf-eval.yaml
+++ b/mcli/mcli-hf-eval.yaml
@@ -16,7 +16,7 @@ gpu_num: 8
 # gpu_type:
 # cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:
diff --git a/mcli/mcli-hf-generate.yaml b/mcli/mcli-hf-generate.yaml
index b155d362fb..6880564a06 100644
--- a/mcli/mcli-hf-generate.yaml
+++ b/mcli/mcli-hf-generate.yaml
@@ -35,7 +35,7 @@ command: |
       "Here's a quick recipe for baking chocolate chip cookies: Start by" \
       "The best 5 cities to visit in Europe are"
 
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 name: hf-generate
 
 compute:
diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml
index 763a4f8355..36de709aed 100644
--- a/mcli/mcli-llama2-finetune.yaml
+++ b/mcli/mcli-llama2-finetune.yaml
@@ -9,7 +9,7 @@ integrations:
 command: |
   cd llm-foundry/scripts
   composer train/train.py /mnt/config/parameters.yaml
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 name: llama2-finetune
 
 compute:
diff --git a/mcli/mcli-openai-eval.yaml b/mcli/mcli-openai-eval.yaml
index b2536c4a1a..38844a76cf 100644
--- a/mcli/mcli-openai-eval.yaml
+++ b/mcli/mcli-openai-eval.yaml
@@ -16,7 +16,7 @@ gpu_num:  #
 gpu_type:  #
 cluster:  # replace with your cluster here!
 
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 
 # The below is injected as a YAML file: /mnt/config/parameters.yaml
 parameters:
diff --git a/mcli/mcli-pretokenize-oci-upload.yaml b/mcli/mcli-pretokenize-oci-upload.yaml
index 5de4c447a6..4a4781cea3 100644
--- a/mcli/mcli-pretokenize-oci-upload.yaml
+++ b/mcli/mcli-pretokenize-oci-upload.yaml
@@ -1,5 +1,5 @@
 name: c4-2k-pre-tokenized
-image: mosaicml/llm-foundry:2.2.0_cu121_flash2-latest
+image: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
 compute:
   gpus: 8  # Number of GPUs to use