From e15a66412cc220fa241ec7cbb64b339a2f124761 Mon Sep 17 00:00:00 2001
From: four4fish <88516121+four4fish@users.noreply.github.com>
Date: Sun, 20 Feb 2022 10:11:47 -0800
Subject: [PATCH] Add back deterministic support in accelerator_connector
 (#11999)

Co-authored-by: ananthsub <ananth.subramaniam@gmail.com>
Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
---
 CHANGELOG.md                                  |  3 +++
 .../connectors/accelerator_connector.py       | 22 ++++++++++++++++++-
 .../test_accelerator_connector.py             |  9 ++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8bb34adeef05..c88e4fa0e9564 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -299,6 +299,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Changed default logger name to `lightning_logs` for consistency ([#11762](https://github.com/PyTorchLightning/pytorch-lightning/pull/11762))
 
+
+- Rewrote `accelerator_connector` ([#11448](https://github.com/PyTorchLightning/pytorch-lightning/pull/11448))
+
 ### Deprecated
 
 - Deprecated `training_type_plugin` property in favor of `strategy` in `Trainer` and updated the references ([#11141](https://github.com/PyTorchLightning/pytorch-lightning/pull/11141))
diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 20c5f485b4e71..8d451f97249fc 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -74,7 +74,12 @@
     rank_zero_warn,
 )
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _HOROVOD_AVAILABLE, _IPU_AVAILABLE, _TPU_AVAILABLE
+from pytorch_lightning.utilities.imports import (
+    _HOROVOD_AVAILABLE,
+    _IPU_AVAILABLE,
+    _TORCH_GREATER_EQUAL_1_8,
+    _TPU_AVAILABLE,
+)
 
 log = logging.getLogger(__name__)
 
@@ -141,6 +146,7 @@ def __init__(
         torch.backends.cudnn.benchmark = benchmark
         self.replace_sampler_ddp = replace_sampler_ddp
         self.sync_batchnorm = sync_batchnorm
+        self._init_deterministic(deterministic)
 
         # 1. Parsing flags
         # Get registered strategies, built-in accelerators and precision plugins
@@ -196,6 +202,20 @@ def __init__(
         # 6. Instantiate Strategy - Part 2
         self._lazy_init_strategy()
 
+    def _init_deterministic(self, deterministic: bool) -> None:
+        self.deterministic = deterministic
+        if _TORCH_GREATER_EQUAL_1_8:
+            torch.use_deterministic_algorithms(deterministic)
+        else:
+            torch.set_deterministic(deterministic)
+        if deterministic:
+            # fixing non-deterministic part of horovod
+            # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
+            os.environ["HOROVOD_FUSION_THRESHOLD"] = "0"
+
+            # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
+            os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+
     def _check_config_and_set_final_flags(
         self,
         strategy: Optional[Union[str, Strategy]],
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 76fa6d64f5a56..d616abd76bfa5 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -947,3 +947,12 @@ def test_passing_zero_and_empty_list_to_devices_flag():
 
     with pytest.warns(UserWarning, match=r"switching to `cpu` accelerator"):
         Trainer(accelerator="gpu", devices=[])
+
+
+@pytest.mark.parametrize("deterministic", [True, False])
+def test_deterministic_init(deterministic):
+    trainer = Trainer(accelerator="auto", deterministic=deterministic)
+    assert trainer._accelerator_connector.deterministic == deterministic
+    if deterministic:
+        assert os.environ.get("CUBLAS_WORKSPACE_CONFIG") == ":4096:8"
+        assert os.environ.get("HOROVOD_FUSION_THRESHOLD") == "0"