Skip to content

Commit

Permalink
abstract out optimizer selection to a deepspeed util
Browse files Browse the repository at this point in the history
  • Loading branch information
eljandoubi committed Nov 25, 2024
1 parent 92a2e1f commit 1cdbb40
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 38 deletions.
40 changes: 2 additions & 38 deletions src/accelerate/accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from typing import Any, Callable, Union

import torch
import torch.optim.adagrad
import torch.utils.hooks as hooks
from huggingface_hub import split_torch_state_dict_into_shards

Expand Down Expand Up @@ -84,7 +83,6 @@
has_offloaded_params,
is_bf16_available,
is_bitsandbytes_multi_backend_available,
is_bnb_available,
is_deepspeed_available,
is_ipex_available,
is_lomo_available,
Expand Down Expand Up @@ -121,6 +119,7 @@
DeepSpeedSchedulerWrapper,
DummyOptim,
DummyScheduler,
map_pytorch_optim_to_deepspeed,
)

if is_megatron_lm_available():
Expand Down Expand Up @@ -1841,42 +1840,7 @@ def _prepare_deepspeed(self, *args):
if self.deepspeed_config["zero_optimization"].get("offload_optimizer", {}).get(
"device", "none"
) != "none" and self.deepspeed_config.get("zero_force_ds_cpu_optimizer", True):
defaults = {k: v for k, v in optimizer.defaults.items() if k in ["lr", "weight_decay"]}

# Select the DeepSpeedCPUOptimizer based on the original optimizer class.
# DeepSpeedCPUAdam is the default
from deepspeed.ops.adam import DeepSpeedCPUAdam

optimizer_class = DeepSpeedCPUAdam

# For DeepSpeedCPUAdagrad
if compare_versions("deepspeed", ">=", "0.5.5"):
# Check if the optimizer is PyTorch's Adagrad.
is_ada = isinstance(optimizer, torch.optim.Adagrad)
# If not, and bitsandbytes is available,
# # check if the optimizer is the 32-bit bitsandbytes Adagrad.
if is_bnb_available() and not is_ada:
import bitsandbytes.optim as bnb_opt

is_ada = (
isinstance(optimizer, (bnb_opt.Adagrad, bnb_opt.Adagrad32bit))
and optimizer.optim_bits == 32
)
if is_ada:
from deepspeed.ops.adagrad import DeepSpeedCPUAdagrad

optimizer_class = DeepSpeedCPUAdagrad

# For DeepSpeedCPULion
if is_bnb_available(min_version="0.38.0") and compare_versions("deepspeed", ">=", "0.11.0"):
from bitsandbytes.optim import Lion, Lion32bit

if isinstance(optimizer, (Lion, Lion32bit)) and optimizer.optim_bits == 32:
from deepspeed.ops.lion import DeepSpeedCPULion

optimizer_class = DeepSpeedCPULion

optimizer = optimizer_class(optimizer.param_groups, **defaults)
optimizer = map_pytorch_optim_to_deepspeed(optimizer)
kwargs["optimizer"] = optimizer
if scheduler is not None:
if type(scheduler).__name__ in deepspeed.runtime.lr_schedules.VALID_LR_SCHEDULES:
Expand Down
1 change: 1 addition & 0 deletions src/accelerate/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@
DummyScheduler,
HfDeepSpeedConfig,
get_active_deepspeed_plugin,
map_pytorch_optim_to_deepspeed,
)

from .bnb import has_4bit_bnb_layers, load_and_quantize_model
Expand Down
47 changes: 47 additions & 0 deletions src/accelerate/utils/deepspeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,56 @@
import os
from copy import deepcopy

from torch import optim

from ..optimizer import AcceleratedOptimizer
from ..scheduler import AcceleratedScheduler
from .dataclasses import DistributedType
from .imports import is_bnb_available
from .versions import compare_versions


def map_pytorch_optim_to_deepspeed(optimizer):
"""
Args:
optimizer: torch.optim.Optimizer
Returns the DeepSeedCPUOptimizer (deepspeed.ops) version of the optimizer.
"""

defaults = {k: v for k, v in optimizer.defaults.items() if k in ["lr", "weight_decay"]}

# Select the DeepSpeedCPUOptimizer based on the original optimizer class.
# DeepSpeedCPUAdam is the default
from deepspeed.ops.adam import DeepSpeedCPUAdam

optimizer_class = DeepSpeedCPUAdam

# For DeepSpeedCPUAdagrad
if compare_versions("deepspeed", ">=", "0.5.5"):
# Check if the optimizer is PyTorch's Adagrad.
is_ada = isinstance(optimizer, optim.Adagrad)
# If not, and bitsandbytes is available,
# # check if the optimizer is the 32-bit bitsandbytes Adagrad.
if is_bnb_available() and not is_ada:
import bitsandbytes.optim as bnb_opt

is_ada = isinstance(optimizer, (bnb_opt.Adagrad, bnb_opt.Adagrad32bit)) and optimizer.optim_bits == 32
if is_ada:
from deepspeed.ops.adagrad import DeepSpeedCPUAdagrad

optimizer_class = DeepSpeedCPUAdagrad

# For DeepSpeedCPULion
if is_bnb_available(min_version="0.38.0") and compare_versions("deepspeed", ">=", "0.11.0"):
from bitsandbytes.optim import Lion, Lion32bit

if isinstance(optimizer, (Lion, Lion32bit)) and optimizer.optim_bits == 32:
from deepspeed.ops.lion import DeepSpeedCPULion

optimizer_class = DeepSpeedCPULion

return optimizer_class(optimizer.param_groups, **defaults)


def get_active_deepspeed_plugin(state):
Expand Down

0 comments on commit 1cdbb40

Please sign in to comment.