merge

mosaicml · May 3, 2024 · 9f43b86 · 9f43b86
2 parents 6c35b59 + ddf4aa4
commit 9f43b86
Show file tree

Hide file tree

Showing 173 changed files with 12,445 additions and 7,069 deletions.
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -17,11 +17,11 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "2.2.1_cu121_flash2"
-          base_image: mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04
+        - name: "2.3.0_cu121_flash2"
+          base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
           dep_groups: "[gpu-flash2]"
-        - name: "2.2.1_cu121_flash2_aws"
-          base_image: mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04-aws
+        - name: "2.3.0_cu121_flash2_aws"
+          base_image: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04-aws
           dep_groups: "[gpu-flash2]"
     steps:
     - name: Maximize Build Space on Worker

diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
@@ -19,8 +19,8 @@ jobs:
     strategy:
       matrix:
         include:
-        - name: "cpu-2.2.1"
-          container: mosaicml/pytorch:2.2.1_cpu-python3.11-ubuntu20.04
+        - name: "cpu-2.3.0"
+          container: mosaicml/pytorch:2.3.0_cpu-python3.11-ubuntu20.04
           markers: "not gpu"
           pytest_command: "coverage run -m pytest"
     name: ${{ matrix.name }}

diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml
@@ -9,21 +9,21 @@ on:
     - main
     - release/**
   workflow_dispatch:
-# Cancel old runs when a new commit is pushed to the same branch if not on main or dev
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 jobs:
   pytest-gpu:
     uses: mosaicml/ci-testing/.github/workflows/[email protected]
     strategy:
+      fail-fast: false
       matrix:
         include:
-        - name: "gpu-2.2.1"
-          container: mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04
+        - name: "gpu-2.3.0"
+          container: mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04
           markers: "gpu"
-          pip_deps: "[all]"
           pytest_command: "coverage run -m pytest"
+          pip_deps: "[all]"
     name: ${{ matrix.name }}
     if: github.repository_owner == 'mosaicml'
     with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,12 @@
 default_language_version:
   python: python3
 repos:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.2.2
+  hooks:
+  - id: ruff
+    args: [--fix, --exit-non-zero-on-fix]
 - repo: https://github.com/google/yapf
   rev: v0.32.0
   hooks:

diff --git a/README.md b/README.md
@@ -113,24 +113,24 @@ If you have success/failure using LLM Foundry on other systems, please let us kn
 
 | Device         | Torch Version | Cuda Version | Status                       |
 | -------------- | ------------- | ------------ | ---------------------------- |
-| A100-40GB/80GB | 2.2.1         | 12.1         | :white_check_mark: Supported |
-| H100-80GB      | 2.2.1         | 12.1         | :white_check_mark: Supported |
+| A100-40GB/80GB | 2.3.0         | 12.1         | :white_check_mark: Supported |
+| H100-80GB      | 2.3.0         | 12.1         | :white_check_mark: Supported |
 
 ## MosaicML Docker Images
 We highly recommend using our prebuilt Docker images. You can find them here: https://hub.docker.com/orgs/mosaicml/repositories.
 
 The `mosaicml/pytorch` images are pinned to specific PyTorch and CUDA versions, and are stable and rarely updated.
 
 The `mosaicml/llm-foundry` images are built with new tags upon every commit to the `main` branch.
-You can select a specific commit hash such as `mosaicml/llm-foundry:2.2.1_cu121_flash2-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.2.1_cu121_flash2-latest`.
+You can select a specific commit hash such as `mosaicml/llm-foundry:2.3.0_cu121_flash2-36ab1ba` or take the latest one using `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest`.
 
 **Please Note:** The `mosaicml/llm-foundry` images do not come with the `llm-foundry` package preinstalled, just the dependencies. You will still need to `pip install llm-foundry` either from PyPi or from source.
 
 | Docker Image                                           | Torch Version | Cuda Version      | LLM Foundry dependencies installed? |
 | ------------------------------------------------------ | ------------- | ----------------- | ----------------------------------- |
-| `mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04`  | 2.2.1         | 12.1 (Infiniband) | No                                  |
-| `mosaicml/llm-foundry:2.2.1_cu121_flash2-latest`       | 2.2.1         | 12.1 (Infiniband) | Yes                                 |
-| `mosaicml/llm-foundry:2.2.1_cu121_flash2_aws-latest`   | 2.2.1         | 12.1 (EFA)        | Yes                                 |
+| `mosaicml/pytorch:2.3.0_cu121-python3.11-ubuntu20.04`  | 2.3.0         | 12.1 (Infiniband) | No                                  |
+| `mosaicml/llm-foundry:2.3.0_cu121_flash2-latest`       | 2.3.0         | 12.1 (Infiniband) | Yes                                 |
+| `mosaicml/llm-foundry:2.3.0_cu121_flash2_aws-latest`   | 2.3.0         | 12.1 (EFA)        | Yes                                 |
 
 
 # Installation

diff --git a/llmfoundry/__init__.py b/llmfoundry/__init__.py
@@ -12,20 +12,37 @@
 from llmfoundry.utils.logging_utils import SpecificWarningFilter
 
 # Filter out Hugging Face warning for not using a pinned revision of the model
-hf_dynamic_modules_logger = logging.getLogger(
-    'transformers.dynamic_module_utils')
+logger = logging.getLogger('transformers.dynamic_module_utils')
 new_files_warning_filter = SpecificWarningFilter(
-    'A new version of the following files was downloaded from')
+    'A new version of the following files was downloaded from',
+)
 
-hf_dynamic_modules_logger.addFilter(new_files_warning_filter)
+logger.addFilter(new_files_warning_filter)
 
-from llmfoundry import (algorithms, callbacks, cli, data, eval, interfaces,
-                        loggers, metrics, models, optim, tokenizers, utils)
+from llmfoundry import (
+    algorithms,
+    callbacks,
+    cli,
+    data,
+    eval,
+    interfaces,
+    loggers,
+    metrics,
+    models,
+    optim,
+    tokenizers,
+    utils,
+)
 from llmfoundry.data import StreamingFinetuningDataset, StreamingTextDataset
 from llmfoundry.eval import InContextLearningDataset, InContextLearningMetric
 from llmfoundry.models.hf import ComposerHFCausalLM
-from llmfoundry.models.mpt import (ComposerMPTCausalLM, MPTConfig,
-                                   MPTForCausalLM, MPTModel, MPTPreTrainedModel)
+from llmfoundry.models.mpt import (
+    ComposerMPTCausalLM,
+    MPTConfig,
+    MPTForCausalLM,
+    MPTModel,
+    MPTPreTrainedModel,
+)
 from llmfoundry.optim import DecoupledLionW
 
 __all__ = [

diff --git a/llmfoundry/algorithms/__init__.py b/llmfoundry/algorithms/__init__.py
@@ -1,8 +1,12 @@
 # Copyright 2024 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
-from composer.algorithms import (Alibi, GatedLinearUnits, GradientClipping,
-                                 LowPrecisionLayerNorm)
+from composer.algorithms import (
+    Alibi,
+    GatedLinearUnits,
+    GradientClipping,
+    LowPrecisionLayerNorm,
+)
 
 from llmfoundry.registry import algorithms
 

diff --git a/llmfoundry/callbacks/__init__.py b/llmfoundry/callbacks/__init__.py
@@ -1,22 +1,32 @@
 # Copyright 2022 MosaicML LLM Foundry authors
 # SPDX-License-Identifier: Apache-2.0
 
-from composer.callbacks import (EarlyStopper, EvalOutputLogging, Generate,
-                                LRMonitor, MemoryMonitor, MemorySnapshot,
-                                OOMObserver, OptimizerMonitor, RuntimeEstimator,
-                                SpeedMonitor)
+from composer.callbacks import (
+    EarlyStopper,
+    Generate,
+    LRMonitor,
+    MemoryMonitor,
+    MemorySnapshot,
+    OOMObserver,
+    OptimizerMonitor,
+    RuntimeEstimator,
+    SpeedMonitor,
+)
 
 from llmfoundry.callbacks.async_eval_callback import AsyncEval
 from llmfoundry.callbacks.curriculum_learning_callback import CurriculumLearning
 from llmfoundry.callbacks.eval_gauntlet_callback import EvalGauntlet
+from llmfoundry.callbacks.eval_output_logging_callback import EvalOutputLogging
 from llmfoundry.callbacks.fdiff_callback import FDiffMetrics
 from llmfoundry.callbacks.hf_checkpointer import HuggingFaceCheckpointer
 from llmfoundry.callbacks.log_mbmoe_tok_per_expert_callback import \
     MegaBlocksMoE_TokPerExpert
 from llmfoundry.callbacks.monolithic_ckpt_callback import \
     MonolithicCheckpointSaver
-from llmfoundry.callbacks.resumption_callbacks import (GlobalLRScaling,
-                                                       LayerFreezing)
+from llmfoundry.callbacks.resumption_callbacks import (
+    GlobalLRScaling,
+    LayerFreezing,
+)
 from llmfoundry.callbacks.scheduled_gc_callback import ScheduledGarbageCollector
 from llmfoundry.registry import callbacks, callbacks_with_config