Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bfloat16 support to Lightning Trainer #9049

Merged
merged 15 commits into from
Aug 24, 2021
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added DeepSpeed Stage 1 support ([#8974](https://github.com/PyTorchLightning/pytorch-lightning/pull/8974))


- Added bfloat16 support for Lightning Trainer ([#9049](https://github.com/PyTorchLightning/pytorch-lightning/pull/9049))


### Changed

- Parsing of the `gpus` Trainer argument has changed: `gpus="n"` (str) no longer selects the GPU index n and instead selects the first n devices. ([#8770](https://github.com/PyTorchLightning/pytorch-lightning/pull/8770))
Expand Down
62 changes: 50 additions & 12 deletions pytorch_lightning/plugins/precision/native_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,60 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from contextlib import contextmanager
from typing import Any, Callable, Dict, Generator
from typing import Any, Callable, Dict, Generator, Union

import torch
from torch.optim import LBFGS, Optimizer

import pytorch_lightning as pl
from pytorch_lightning.plugins.precision.mixed import MixedPrecisionPlugin
from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE, AMPType
from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE, _TORCH_GREATER_EQUAL_1_10, AMPType
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.warnings import WarningCache

warning_cache = WarningCache()


class NativeMixedPrecisionPlugin(MixedPrecisionPlugin):
"""Plugin for native mixed precision training with :mod:`torch.cuda.amp`."""
"""
Plugin for native mixed precision training with :mod:`torch.cuda.amp`.

Args:
precision: Whether to use torch.float16 (16) or torch.bfloat16 (bfloat16).
"""

def __init__(self) -> None:
def __init__(self, precision: Union[int, str] = 16) -> None:
super().__init__()

if not _NATIVE_AMP_AVAILABLE:
raise MisconfigurationException(
"You have asked for native AMP but your PyTorch version does not support it."
" Consider upgrading with `pip install torch>=1.6`."
)

self.fast_dtype = self._select_precision_dtype(precision)
self.backend = AMPType.NATIVE
self.scaler = torch.cuda.amp.GradScaler()
if not self.is_bfloat16:
self.scaler = torch.cuda.amp.GradScaler()

def _select_precision_dtype(self, precision: Union[int, str] = 16) -> torch.dtype:
if precision == "bfloat16":
if not _TORCH_GREATER_EQUAL_1_10:
raise MisconfigurationException(
"To use bfloat16 with native amp you must install torch greater or equal to 1.10."
)
return torch.bfloat16
return torch.float16

@property
def is_bfloat16(self) -> bool:
return self.fast_dtype == torch.bfloat16

def pre_backward(self, model: "pl.LightningModule", closure_loss: torch.Tensor) -> torch.Tensor:
if self.is_bfloat16:
warning_cache.warn(
"Skipping torch.cuda.amp.GradScaler in NativeMixedPrecisionPlugin as torch.bfloat16 is used."
)
return super().pre_backward(model, closure_loss)
closure_loss = self.scaler.scale(closure_loss)
return super().pre_backward(model, closure_loss)

Expand All @@ -49,6 +77,9 @@ def pre_optimizer_step(
lambda_closure: Callable,
**kwargs: Any,
) -> bool:
if self.is_bfloat16:
# skip scaler logic, as bfloat16 does not require scaler
return super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs)
if isinstance(optimizer, LBFGS):
raise MisconfigurationException(
f"native PyTorch amp and lbfgs are not compatible (optimizer {optimizer_idx})."
Expand All @@ -65,33 +96,40 @@ def pre_optimizer_step(
self.scaler.update()
return False

@property
def autocast(self) -> torch.cuda.amp.autocast:
if self.is_bfloat16:
return torch.cuda.amp.autocast(fast_dtype=self.fast_dtype)
return torch.cuda.amp.autocast()

@contextmanager
def train_step_context(self) -> Generator[None, None, None]:
"""Enable autocast context"""
with torch.cuda.amp.autocast():
with self.autocast:
yield

@contextmanager
def val_step_context(self) -> Generator[None, None, None]:
"""Enable autocast context"""
with torch.cuda.amp.autocast():
with self.autocast:
yield

@contextmanager
def test_step_context(self) -> Generator[None, None, None]:
"""Enable autocast context"""
with torch.cuda.amp.autocast():
with self.autocast:
yield

@contextmanager
def predict_step_context(self) -> Generator[None, None, None]:
"""Enable autocast context"""
with torch.cuda.amp.autocast():
with self.autocast:
yield

def on_load_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
if "native_amp_scaling_state" in checkpoint:
if "native_amp_scaling_state" in checkpoint and not self.is_bfloat16:
self.scaler.load_state_dict(checkpoint["native_amp_scaling_state"])

def on_save_checkpoint(self, checkpoint: Dict[str, Any]) -> None:
checkpoint["native_amp_scaling_state"] = self.scaler.state_dict()
if not self.is_bfloat16:
checkpoint["native_amp_scaling_state"] = self.scaler.state_dict()
4 changes: 2 additions & 2 deletions pytorch_lightning/plugins/precision/sharded_native_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
class ShardedNativeMixedPrecisionPlugin(NativeMixedPrecisionPlugin):
"""Mixed Precision for Sharded Training"""

def __init__(self) -> None:
super().__init__()
def __init__(self, precision: Union[int, str] = 16) -> None:
super().__init__(precision)
self.scaler = ShardedGradScaler()

def clip_grad_by_norm(
Expand Down
10 changes: 5 additions & 5 deletions pytorch_lightning/trainer/connectors/accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def select_precision_plugin(self) -> PrecisionPlugin:
return PrecisionPlugin()
if self.precision == 64:
return DoublePrecisionPlugin()
if self.precision == 16:
if self.precision in (16, "bfloat16"):
if self.use_tpu:
return TPUHalfPrecisionPlugin()

Expand All @@ -581,12 +581,12 @@ def select_precision_plugin(self) -> PrecisionPlugin:
else:
raise MisconfigurationException(msg)
else:
log.info("Using native 16bit precision.")
log.info(f"Using native {self.precision}bit precision")
if self._is_sharded_training_type:
return ShardedNativeMixedPrecisionPlugin()
return ShardedNativeMixedPrecisionPlugin(self.precision)
if self._is_fully_sharded_training_type:
return FullyShardedNativeMixedPrecisionPlugin()
return NativeMixedPrecisionPlugin()
return FullyShardedNativeMixedPrecisionPlugin(self.precision)
return NativeMixedPrecisionPlugin(self.precision)

if self.amp_type == AMPType.APEX:
if not _APEX_AVAILABLE:
Expand Down
6 changes: 3 additions & 3 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def __init__(
log_every_n_steps: int = 50,
accelerator: Optional[Union[str, Accelerator]] = None,
sync_batchnorm: bool = False,
precision: int = 32,
precision: Union[int, str] = 32,
weights_summary: Optional[str] = "top",
weights_save_path: Optional[str] = None,
num_sanity_val_steps: int = 2,
Expand Down Expand Up @@ -255,8 +255,8 @@ def __init__(

plugins: Plugins allow modification of core behavior like ddp and amp, and enable custom lightning plugins.

precision: Double precision (64), full precision (32) or half precision (16). Can be used on CPU, GPU or
TPUs.
precision: Double precision (64), full precision (32), half precision (16) or bfloat16 precision (bfloat16).
Can be used on CPU, GPU or TPUs.

max_epochs: Stop training once this number of epochs is reached. Disabled by default (None).
If both max_epochs and max_steps are not specified, defaults to ``max_epochs`` = 1000.
Expand Down
1 change: 1 addition & 0 deletions pytorch_lightning/utilities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
_TORCH_GREATER_EQUAL_1_7,
_TORCH_GREATER_EQUAL_1_8,
_TORCH_GREATER_EQUAL_1_9,
_TORCH_GREATER_EQUAL_1_10,
_TORCH_QUANTIZE_AVAILABLE,
_TORCHTEXT_AVAILABLE,
_TORCHVISION_AVAILABLE,
Expand Down
2 changes: 2 additions & 0 deletions pytorch_lightning/utilities/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def _compare_version(package: str, op, version) -> bool:
_TORCH_GREATER_EQUAL_1_8 = _compare_version("torch", operator.ge, "1.8.0")
_TORCH_GREATER_EQUAL_1_8_1 = _compare_version("torch", operator.ge, "1.8.1")
_TORCH_GREATER_EQUAL_1_9 = _compare_version("torch", operator.ge, "1.9.0")
_TORCH_GREATER_EQUAL_1_10 = _compare_version("torch", operator.ge, "1.10.0dev")


_APEX_AVAILABLE = _module_available("apex.amp")
_BOLTS_AVAILABLE = _module_available("pl_bolts")
Expand Down
66 changes: 66 additions & 0 deletions tests/plugins/test_amp_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pytorch_lightning import Trainer
from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin
from pytorch_lightning.plugins.precision import MixedPrecisionPlugin
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers import BoringModel
from tests.helpers.runif import RunIf

Expand Down Expand Up @@ -174,3 +175,68 @@ def test_amp_apex_ddp_spawn_fit(amp_level, tmpdir):
assert isinstance(trainer.precision_plugin, ApexMixedPrecisionPlugin)
model = BoringModel()
trainer.fit(model)


@RunIf(min_gpus=1, amp_native=True)
def test_amp_precision_16_bfloat_disabled(tmpdir):

trainer = Trainer(
default_root_dir=tmpdir,
fast_dev_run=True,
precision=16,
gpus=1,
)
plugin = trainer.precision_plugin
assert isinstance(plugin, NativeMixedPrecisionPlugin)
assert not plugin.is_bfloat16


@RunIf(min_gpus=1, amp_native=True, min_torch="1.10.0dev")
def test_amp_precision_bfloat(tmpdir):
class TestModel(BoringModel):
def training_step(self, batch, batch_idx):
output = self(batch)
assert output.dtype == torch.bfloat16
loss = self.loss(batch, output)
return {"loss": loss}

def validation_step(self, batch, batch_idx):
output = self(batch)
assert output.dtype == torch.bfloat16
loss = self.loss(batch, output)
return {"x": loss}

def test_step(self, batch, batch_idx):
output = self(batch)
assert output.dtype == torch.bfloat16
loss = self.loss(batch, output)
return {"y": loss}

model = TestModel()
trainer = Trainer(
default_root_dir=tmpdir,
fast_dev_run=True,
precision="bfloat16",
gpus=1,
)
plugin = trainer.precision_plugin
assert isinstance(plugin, NativeMixedPrecisionPlugin)
assert plugin.is_bfloat16
assert plugin.autocast.fast_dtype == torch.bfloat16
with pytest.warns(
UserWarning, match="Skipping torch.cuda.amp.GradScaler in NativeMixedPrecisionPlugin as torch.bfloat16 is used."
):
trainer.fit(model)


@RunIf(min_gpus=1, amp_native=True, max_torch="1.9")
def test_amp_precision_16_bfloat_throws_error(tmpdir):
with pytest.raises(
MisconfigurationException,
match="To use bfloat16 with native amp you must install torch greater or equal to 1.10",
):
Trainer(
default_root_dir=tmpdir,
precision="bfloat16",
gpus=1,
)