diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index b2fa8a5a5..64020f34a 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -5,7 +5,7 @@
 
 version: 2
 updates:
-  # Maintain dependencies for GitHub Actions
+  # Maintain dependencies for GitHub Actions.
   - package-ecosystem: "github-actions"
     directory: "/"
     schedule:
@@ -14,7 +14,7 @@ updates:
       - dependency-name: "*"
         update-types: ["version-update:semver-patch"]
       
-  # Maintain dependencies for npm
+  # Maintain dependencies for npm.
   - package-ecosystem: "npm"
     directory: "/"
     schedule:
@@ -22,7 +22,7 @@ updates:
     ignore:
       - dependency-name: "*"
         update-types: ["version-update:semver-patch"]
-  # Maintain dependencies for npm
+  # Maintain dependencies for npm.
   - package-ecosystem: "pip"
     directory: "/"
     schedule:
diff --git a/.github/workflows/stable_learning_control.yml b/.github/workflows/stable_learning_control.yml
index 370f3a538..0418918eb 100644
--- a/.github/workflows/stable_learning_control.yml
+++ b/.github/workflows/stable_learning_control.yml
@@ -7,7 +7,7 @@ on:
     tags-ignore:
       - v*.*.*
 jobs:
-  markdown-lint: # Lints the markdown code
+  markdown-lint: # Lints the markdown code.
     name: runner / remark-lint
     runs-on: ubuntu-latest
     steps:
@@ -18,7 +18,7 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           reporter: github-check
           level: warning
-  black: # Check python code format
+  black: # Check python code format.
     name: runner / black
     runs-on: ubuntu-latest
     steps:
@@ -28,7 +28,7 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           reporter: github-check
           level: warning
-  flake8: # Lints python code
+  flake8: # Lints python code.
     name: runner / flake8
     runs-on: ubuntu-latest
     steps:
@@ -54,9 +54,9 @@ jobs:
     name: python-tests (Testing)
     runs-on: ubuntu-latest
     strategy:
-      fail-fast: false # Run all matrix jobs
+      fail-fast: false # Run all matrix jobs.
       matrix:
-        python-version: [3.8, 3.9, "3.10"] # Supported python versions
+        python-version: [3.8, 3.9, "3.10"] # Supported python versions.
     steps:
       - name: Checkout stable-learning-control repository
         uses: actions/checkout@v3
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5f3f29c93..2b1ddf8a1 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,7 +15,7 @@ jobs:
           level: warning
           exclude: |
             ./CHANGELOG.md
-  alex: # Checks docs for inconsiderate writing
+  alex: # Checks docs for inconsiderate writing.
     name: runner / alex
     runs-on: ubuntu-latest
     steps:
@@ -25,7 +25,7 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           reporter: github-check
           level: warning
-  markdown-lint: # Lints the markdown code
+  markdown-lint: # Lints the markdown code.
     name: runner / remark-lint
     runs-on: ubuntu-latest
     steps:
@@ -36,7 +36,7 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           reporter: github-check
           level: warning
-  black: # Check python code format
+  black: # Check python code format.
     name: runner / black
     runs-on: ubuntu-latest
     steps:
@@ -46,7 +46,7 @@ jobs:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           reporter: github-check
           level: warning
-  flake8: # Lints python code
+  flake8: # Lints python code.
     name: runner / flake8
     runs-on: ubuntu-latest
     steps:
@@ -72,9 +72,9 @@ jobs:
     name: python-tests (Testing)
     runs-on: ubuntu-latest
     strategy:
-      fail-fast: false # Run all matrix jobs
+      fail-fast: false # Run all matrix jobs.
       matrix:
-        python-version: [3.8, 3.9, "3.10"] # Supported python versions
+        python-version: [3.8, 3.9, "3.10"] # Supported python versions.
     steps:
       - name: Checkout stable-learning-control repository
         uses: actions/checkout@v3
diff --git a/docs/Makefile b/docs/Makefile
index 8ff39e1b6..c760baea6 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,6 +1,6 @@
 # Makefile for Stable Learning Control Sphinx documentation
 
-# You can set these variables from the command line.
+# You can set these variables from the command line
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 SPHINXPROJ    = Stable Learning Control
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 059a95a40..677176391 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -68,12 +68,12 @@ def __getattr__(cls, name):
     "myst_parser",
 ]
 
-# Extension settings
+# Extension settings.
 autosummary_generate = True
 autosummary_generate_overwrite = True
 autodoc_member_order = "bysource"
 
-# imgmath settings
+# imgmath settings.
 imgmath_image_format = "svg"
 imgmath_font_size = 14
 
@@ -110,7 +110,7 @@ def __getattr__(cls, name):
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
+# This patterns also effect to html_static_path and html_extra_path.
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "TODO.*", "README.*"]
 
 # The name of the Pygments (syntax highlighting) style to use.
diff --git a/docs/source/control/eval_robustness.rst b/docs/source/control/eval_robustness.rst
index dfcb865ce..8e2cd28ab 100644
--- a/docs/source/control/eval_robustness.rst
+++ b/docs/source/control/eval_robustness.rst
@@ -99,15 +99,15 @@ under several Impulse disturbances.
 
     if __name__ == "__main__":
 
-        # Parse input arguments
+        # Parse input arguments.
         parser = argparse.ArgumentParser()
         parser.add_argument("fpath", type=str, help="The path where the policy is stored")
         args = parser.parse_args()
 
-        # Retrieve dataframe
+        # Retrieve dataframe.
         robustness_eval_df = pd.read_csv(Path(args.fpath).absolute())
 
-        # Retrieve observation and reference data from the dataframe
+        # Retrieve observation and reference data from the dataframe.
         o_disturbances_df = robustness_eval_df.query("variable == 'observation'").dropna(
             axis=1, how="all"
         )
@@ -115,7 +115,7 @@ under several Impulse disturbances.
             axis=1, how="all"
         )
 
-        # Merge observations and references into one dataframe
+        # Merge observations and references into one dataframe.
         obs_df_tmp = o_disturbances_df.query("observation == 3")
         obs_df_tmp["signal"] = "obs_" + (obs_df_tmp["observation"] + 1).astype(str)
         obs_df_tmp.insert(len(obs_df_tmp.columns), "type", "observation")
@@ -190,14 +190,14 @@ class to add all the required methods and attributes to make it compatible with
     import numpy as np
     from stable_learning_control.simzoo.simzoo.common.disturber import Disturber
 
-    # Disturber config used to overwrite the default config
+    # Disturber config used to overwrite the default config.
     DISTURBER_CFG = {
-        # Disturbance applied to environment variables
+        # Disturbance applied to environment variables.
         "env": {
             "description": "Pole length disturbance",
-            # The env variable which you want to disturb
+            # The env variable which you want to disturb.
             "variable": "length",
-            # The range of values you want to use for each disturbance iteration
+            # The range of values you want to use for each disturbance iteration.
             "variable_range": np.linspace(0.5, 2.0, num=5, dtype=np.float32),
             # Label used in robustness plots.
             "label": "r: %s",
@@ -271,10 +271,10 @@ When editing the ``DISTURBANCE_CFG`` config in the :class:`~stable_learning_cont
     :linenos:
     :emphasize-lines: 5
 
-    # A random noise that is applied at every timestep
+    # A random noise that is applied at every timestep.
     "noise": {
         "description": "Random noise disturbance",
-        # The means and standards deviations of the random noise disturbance
+        # The means and standards deviations of the random noise disturbance.
         "noise_range": {
             "mean": np.linspace(80, 155, num=3, dtype=np.int16),
             "std": np.linspace(1.0, 5.0, num=3, dtype=np.int16),
@@ -292,14 +292,14 @@ When editing the ``DISTURBANCE_CFG`` config in the :class:`~stable_learning_cont
 
     # Disturbance applied to the *OUTPUT* of the environment step function
     "output": {
-        # The disturbance variant used when no variant is given
+        # The disturbance variant used when no variant is given.
         "default_variant": "impulse",
-        # A random noise that is applied at every timestep
+        # A random noise that is applied at every timestep.
         "noise": {
             "description": "Random noise disturbance",
-            # The means and standards deviations of the random noise disturbance
+            # The means and standards deviations of the random noise disturbance.
             "noise_range": {
-                # "mean": np.linspace(80, 155, num=3, dtype=np.int16),  # All obs
+                # "mean": np.linspace(80, 155, num=3, dtype=np.int16),  # All obs.
                 "mean": np.vstack(
                     (
                         np.linspace(80, 155, num=3, dtype=np.int16),  # Obs 1
@@ -308,7 +308,7 @@ When editing the ``DISTURBANCE_CFG`` config in the :class:`~stable_learning_cont
                         np.linspace(80, 155, num=3, dtype=np.int16),  # Obs 4
                     )
                 ).T,
-                # "std": np.linspace(1.0, 5.0, num=3, dtype=np.int16),  # All Obs
+                # "std": np.linspace(1.0, 5.0, num=3, dtype=np.int16),  # All Obs.
                 "std": np.vstack(
                     (
                         np.linspace(1.0, 5.0, num=3, dtype=np.int16),  # Obs 1
@@ -332,12 +332,12 @@ When editing the ``DISTURBANCE_CFG`` config in the :class:`~stable_learning_cont
     :linenos:
     :emphasize-lines: 4, 12
 
-    # Input and output noise disturbance
+    # Input and output noise disturbance.
     "noise": {
         "description": "Random input and output noise disturbance",
         "input_noise": {
             # The means and standards deviations of the random input noise
-            # disturbance
+            # disturbance.
             "noise_range": {
                 "mean": np.linspace(80, 155, num=3, dtype=np.int16),
                 "std": np.linspace(1.0, 5.0, num=3, dtype=np.int16),
@@ -345,7 +345,7 @@ When editing the ``DISTURBANCE_CFG`` config in the :class:`~stable_learning_cont
         },
         "output_noise": {
             # The means and standards deviations of the random output noise
-            # disturbance
+            # disturbance.
             "noise_range": {
                 "mean": np.linspace(80, 155, num=3, dtype=np.int16),
                 "std": np.linspace(1.0, 5.0, num=3, dtype=np.int16),
diff --git a/docs/source/control/saving_and_loading.rst b/docs/source/control/saving_and_loading.rst
index 0bf47d98c..71a929f0d 100644
--- a/docs/source/control/saving_and_loading.rst
+++ b/docs/source/control/saving_and_loading.rst
@@ -189,10 +189,10 @@ the :torch:`PyTorch documentation <tutorials/beginner/saving_loading_models.html
     MODEL_LOAD_FOLDER = "./data/lac/oscillator-v1/runs/run_1614680001"
     MODEL_PATH = osp.join(MODEL_LOAD_FOLDER, "torch_save/model_state.pt")
 
-    # Restore the model
+    # Restore the model.
     config = EpochLogger.load_config(
         MODEL_LOAD_FOLDER
-    )  # Retrieve the experiment configuration
+    )  # Retrieve the experiment configuration.
     env = EpochLogger.load_env(MODEL_LOAD_FOLDER)
     model = LAC(env=env, ac_kwargs=config["ac_kwargs"])
     restored_model_state_dict = torch.load(MODEL_PATH, map_location="cpu")
@@ -200,12 +200,12 @@ the :torch:`PyTorch documentation <tutorials/beginner/saving_loading_models.html
         restored_model_state_dict,
     )
 
-    # Create dummy observations and retrieve the best action
+    # Create dummy observations and retrieve the best action.
     obs = torch.rand(env.observation_space.shape)
     a = model.get_action(obs)
     L_value = model.ac.L(obs, torch.from_numpy(a))
 
-    # Print results
+    # Print results.
     print(f"The LAC agent thinks it is a good idea to take action {a}.")
     print(f"It assigns a Lyapunov Value of {L_value} to this action.")
 
@@ -237,10 +237,10 @@ Load Tensorflow Policy
     MODEL_LOAD_FOLDER = "./data/lac/oscillator-v1/runs/run_1614673367"
     MODEL_PATH = osp.join(MODEL_LOAD_FOLDER, "tf2_save")
 
-    # Restore the model
+    # Restore the model.
     config = EpochLogger.load_config(
         MODEL_LOAD_FOLDER
-    )  # Retrieve the experiment configuration
+    )  # Retrieve the experiment configuration.
     env = EpochLogger.load_env(MODEL_LOAD_FOLDER)
     model = LAC(env=env, ac_kwargs=config["ac_kwargs"])
     weights_checkpoint = tf.train.latest_checkpoint(MODEL_PATH)
@@ -248,12 +248,12 @@ Load Tensorflow Policy
         weights_checkpoint,
     )
 
-    # Create dummy observations and retrieve the best action
+    # Create dummy observations and retrieve the best action.
     obs = tf.random.uniform((1, env.observation_space.shape[0]))
     a = model.get_action(obs)
     L_value = model.ac.L([obs, tf.expand_dims(a, axis=0)])
 
-    # Print results
+    # Print results.
     print(f"The LAC agent thinks it is a good idea to take action {a}.")
     print(f"It assigns a Lyapunov Value of {L_value} to this action.")
 
diff --git a/docs/source/hardware/hardware.rst b/docs/source/hardware/hardware.rst
index 94e4a4d36..04a28a862 100644
--- a/docs/source/hardware/hardware.rst
+++ b/docs/source/hardware/hardware.rst
@@ -36,11 +36,11 @@ then be loaded onto the hardware using the :obj:`tf.saved_model.load` method.
 
     model_path = "./data/lac/oscillator-v1/runs/run_1614673367/tf2_save"
 
-    # Load model and environment
+    # Load model and environment.
     loaded_model = tf.saved_model.load(model_path)
     loaded_env = EpochLogger.load_env(os.path.dirname(model_path))
 
-    # Get action for dummy observation
+    # Get action for dummy observation.
     obs = tf.random.uniform((1, loaded_env.observation_space.shape[0]))
     a = loaded_model.get_action(obs)
     print(f"\nThe model thinks it is a good idea to take action: {a.numpy()}")
diff --git a/docs/source/simzoo/simzoo.rst b/docs/source/simzoo/simzoo.rst
index 28f89f6ab..0f07154f2 100644
--- a/docs/source/simzoo/simzoo.rst
+++ b/docs/source/simzoo/simzoo.rst
@@ -70,7 +70,7 @@ being printed to the console.
     print("Taking 1000 steps in the Oscillator-v1 environment...")
     for ii in range(1000):
         env.render()  # Does not work with the Oscillator-v1 environment.
-        obs, cost, done, info_doc = env.step(env.action_space.sample())  # take a random action
+        obs, cost, done, info_doc = env.step(env.action_space.sample())  # take a random action.
         if ii % 100 == 0:
             print(f"Randoms step {ii}: {obs}")
     env.close()
diff --git a/docs/source/utils/loggers.rst b/docs/source/utils/loggers.rst
index 0d10a2bf3..c51fe04c1 100644
--- a/docs/source/utils/loggers.rst
+++ b/docs/source/utils/loggers.rst
@@ -96,7 +96,7 @@ saving as well as diagnostic logging:
 
 
     def imshow(img):
-        img = img / 2 + 0.5  # unnormalize
+        img = img / 2 + 0.5  # unnormalize.
         npimg = img.numpy()
         plt.imshow(np.transpose(npimg, (1, 2, 0)))
         plt.show()
@@ -110,7 +110,7 @@ saving as well as diagnostic logging:
         logger_kwargs=dict(),
         save_freq=1,
     ):
-        # Setup logger and save hyperparameters
+        # Setup logger and save hyperparameters.
         logger = EpochLogger(**logger_kwargs, verbose_fmt="tab")
         logger.save_config(locals())
 
@@ -137,7 +137,7 @@ saving as well as diagnostic logging:
             "truck",
         )
 
-        # print information about the dataset
+        # print information about the dataset.
         total_samples = len(trainset)
         n_iterations = math.ceil(total_samples / batch_size)
         logger.log(
@@ -149,36 +149,36 @@ saving as well as diagnostic logging:
             type="info",
         )
 
-        # get some random training images
+        # get some random training images.
         dataiter = iter(trainloader)
         images, labels = dataiter.next()
 
-        # show images
+        # show images.
         imshow(torchvision.utils.make_grid(images))
         logger.log(
             "labels:" + " ".join("%5s" % classes[labels[j]] for j in range(4)), type="info"
-        )  # print labels
+        )  # print labels.
 
-        # Define a Convolutional Neural Network
+        # Define a Convolutional Neural Network.
         net = Net()
 
-        # Define a Loss function and optimizer
+        # Define a Loss function and optimizer.
         criterion = nn.CrossEntropyLoss()
         optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)
 
-        # Setup model saving
+        # Setup model saving.
         logger.setup_pytorch_saver(net)
 
-        # Run main training loop
+        # Run main training loop.
         start_time = time.time()
-        for epoch in range(epochs):  # loop over the dataset multiple times
+        for epoch in range(epochs):  # loop over the dataset multiple times.
             running_loss = 0.0
             correct = 0
             for i, data in enumerate(trainloader, 0):
                 # get the inputs; data is a list of [inputs, labels]
                 inputs, labels = data
 
-                # zero the parameter gradients
+                # zero the parameter gradients.
                 optimizer.zero_grad()
 
                 # forward + backward + optimize
@@ -188,13 +188,13 @@ saving as well as diagnostic logging:
                 loss.backward()
                 optimizer.step()
 
-                # calculate accuracy and increment running loss
+                # calculate accuracy and increment running loss.
                 _, predicted = torch.max(outputs.data, 1)
                 correct += (predicted == labels).float().sum()
                 accuracy = 100 * correct / len(trainset)
                 running_loss += loss.item()
 
-                # print statistics
+                # print statistics.
                 running_loss += loss.item()
                 if i % 2000 == 1999:  # print every 2000 mini-batches
                     logger.log(
@@ -205,11 +205,11 @@ saving as well as diagnostic logging:
                     running_loss = 0.0
                     correct = 0
 
-            # Save model
+            # Save model.
             if (epoch % save_freq == 0) or (epoch == epochs - 1):
                 logger.save_state(state_dict=dict(), itr=None)
 
-            # Log info about epoch
+            # Log info about epoch.
             logger.log_tabular("Epoch", epoch, tb_write=True)
             logger.log_tabular("Acc", with_min_and_max=True, tb_write=True)
             logger.log_tabular("Loss", average_only=True, tb_write=True)
diff --git a/examples/eval_robustness.py b/examples/eval_robustness.py
index ebdf75c5e..e711c79ff 100644
--- a/examples/eval_robustness.py
+++ b/examples/eval_robustness.py
@@ -17,10 +17,10 @@
 
 from stable_learning_control.utils.log_utils import EpochLogger, log_to_std_out
 
-# Disturbance settings
+# Disturbance settings.
 # NOTE: In this example we add a noise disturbance to the action
 disturbance_type = "test1"
-disturbance_variant = "test"  # MAYBE REMOVE
+disturbance_variant = "test"  # TODO: MAYBE REMOVE.
 disturbance_range = {
     "mean": np.linspace(0.0, 0.0, num=4, dtype=np.float32),
     "std": np.linspace(0.0, 20.0, num=4, dtype=np.float32),
@@ -43,7 +43,7 @@ def noise_disturbance(mean, std):
 if __name__ == "__main__":  # noqa: C901
     import argparse
 
-    # Retrieve the policy you want to load
+    # Retrieve the policy you want to load.
     parser = argparse.ArgumentParser()
     parser.add_argument("fpath", type=str, help="The path where the policy is stored")
     parser.add_argument(
@@ -119,7 +119,7 @@ def noise_disturbance(mean, std):
     )
     args = parser.parse_args()
 
-    # Load policy and environment
+    # Load policy and environment.
     try:
         env, policy = load_policy_and_env(
             args.fpath, args.itr if args.itr >= 0 else "last"
@@ -134,17 +134,17 @@ def noise_disturbance(mean, std):
         )
         sys.exit(0)
 
-    # Remove action clipping if present
+    # Remove action clipping if present.
     if hasattr(env.unwrapped, "_clipped_action"):
         env.unwrapped._clipped_action = False
 
-    # Setup logger
+    # Setup logger.
     output_dir = Path(args.fpath).joinpath("eval")
     logger = EpochLogger(
         verbose_fmt="table", output_dir=output_dir, output_fname="eval_statistics.csv"
     )
 
-    # Set max episode length
+    # Set max episode length.
     if args.len is None:
         max_ep_len = env._max_episode_steps
     else:
@@ -181,7 +181,7 @@ def noise_disturbance(mean, std):
     n_disturbance = 0
     disturbances_length = len(disturbance_range["mean"])
     soi_found, ref_found = True, True
-    supports_deterministic = True  # Only supported with gaussian algorithms
+    supports_deterministic = True  # Only supported with gaussian algorithms.
     log_to_std_out("Adding random observation noise.", type="info")
     for _ in range(0, disturbances_length):
         o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
@@ -199,9 +199,9 @@ def noise_disturbance(mean, std):
             f"Disturbance {n_disturbance}: mean: {mean}, std: {std}", type="info"
         )
 
-        # Perform disturbed episodes
+        # Perform disturbed episodes.
         while n < args.episodes:
-            # Render env if requested
+            # Render env if requested.
             if args.render and not render_error:
                 try:
                     env.render()
@@ -217,7 +217,7 @@ def noise_disturbance(mean, std):
                         type="warning",
                     )
 
-            # Retrieve action
+            # Retrieve action.
             if args.deterministic and supports_deterministic:
                 try:
                     a = policy.get_action(o, deterministic=args.deterministic)
@@ -243,7 +243,7 @@ def noise_disturbance(mean, std):
             )  # NOTE: In this example we add a small random noise to the action
             o, r, d, info = env.step(a)
 
-            # Increment counters
+            # Increment counters.
             ep_ret += r
             ep_len += 1
             ################################################
@@ -275,7 +275,7 @@ def noise_disturbance(mean, std):
                     type="warning",
                 )
 
-            # Store performance measurements
+            # Store performance measurements.
             if d or (ep_len == max_ep_len):
                 died = ep_len < max_ep_len
                 logger.store(EpRet=ep_ret, EpLen=ep_len, DeathRate=(float(died)))
@@ -284,24 +284,24 @@ def noise_disturbance(mean, std):
                     % (n, ep_ret, ep_len, died)
                 )
 
-                # Store observations
+                # Store observations.
                 o_episode_df = pd.DataFrame(path["o"])
                 o_episode_df.insert(0, "step", range(0, ep_len))
                 o_episode_df = pd.melt(
                     o_episode_df,
                     id_vars="step",
                     var_name="observation",
-                )  # Flatten robustness_eval_df
+                )  # Flatten robustness_eval_df.
                 o_episodes_dfs.append(o_episode_df)
 
-                # Store episode rewards
+                # Store episode rewards.
                 r_episode_df = pd.DataFrame(
                     {"step": range(0, ep_len), "reward": path["r"]}
                 )
                 r_episode_df.insert(len(r_episode_df.columns), "episode", n)
                 r_episodes_dfs.append(r_episode_df)
 
-                # Store states of interest
+                # Store states of interest.
                 if soi_found:
                     soi_episode_df = pd.DataFrame(path["state_of_interest"])
                     soi_episode_df.insert(0, "step", range(0, ep_len))
@@ -310,10 +310,10 @@ def noise_disturbance(mean, std):
                         id_vars="step",
                         var_name="state_of_interest",
                         value_name="error",
-                    )  # Flatten robustness_eval_df
+                    )  # Flatten robustness_eval_df.
                     soi_episodes_dfs.append(soi_episode_df)
 
-                # Store reference
+                # Store reference.
                 if ref_found:
                     ref_episode_df = pd.DataFrame(path["reference"])
                     ref_episode_df.insert(0, "step", range(0, ep_len))
@@ -321,10 +321,10 @@ def noise_disturbance(mean, std):
                         ref_episode_df,
                         id_vars="step",
                         var_name="reference",
-                    )  # Flatten robustness_eval_df
+                    )  # Flatten robustness_eval_df.
                     ref_episodes_dfs.append(ref_episode_df)
 
-                # Increment counters and reset storage variables
+                # Increment counters and reset storage variables.
                 n += 1
                 o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                 path = {
@@ -334,14 +334,14 @@ def noise_disturbance(mean, std):
                     "state_of_interest": [],
                 }
 
-        # Print robustness evaluation diagnostics
+        # Print robustness evaluation diagnostics.
         logger.log_tabular("EpRet", with_min_and_max=True)
         logger.log_tabular("EpLen", average_only=True)
         logger.log_tabular("DeathRate")
         log_to_std_out("")
         logger.dump_tabular()
 
-        # Add extra disturbance information to the robustness eval robustness_eval_df
+        # Add extra disturbance information to the robustness eval robustness_eval_df.
         disturbance_label = (
             env.disturbance_info["label"]
             if (
@@ -383,7 +383,7 @@ def noise_disturbance(mean, std):
         )
         ref_disturbances_dfs.append(ref_disturbance_df)
 
-        # Reset storage buckets and go to next disturbance
+        # Reset storage buckets and go to next disturbance.
         o_episodes_dfs = []
         r_episodes_dfs = []
         soi_episodes_dfs = []
@@ -396,7 +396,7 @@ def noise_disturbance(mean, std):
         n_disturbance += 1
         ################################################
 
-    # Merge robustness evaluation information for all disturbances
+    # Merge robustness evaluation information for all disturbances.
     o_disturbances_df = pd.concat(o_disturbances_dfs, ignore_index=True)
     r_disturbances_df = pd.concat(r_disturbances_dfs, ignore_index=True)
     soi_disturbances_df = pd.concat(soi_disturbances_dfs, ignore_index=True)
@@ -431,7 +431,7 @@ def noise_disturbance(mean, std):
         disturbance_variant,
     )
 
-    # Save robustness evaluation robustness_eval_df and return it to the user
+    # Save robustness evaluation robustness_eval_df and return it to the user.
     if args.save_result:
         results_path = logger.output_dir.joinpath("results.csv")
         logger.log(
@@ -450,7 +450,7 @@ def noise_disturbance(mean, std):
     log_to_std_out("Showing robustness evaluation plots...", type="info")
     sns.set(style="darkgrid", font_scale=args.font_scale)
 
-    # Unpack required data from robustness_eval_df
+    # Unpack required data from robustness_eval_df.
     obs_found, rew_found, soi_found, ref_found = True, True, True, True
     o_disturbances_df, ref_disturbances_df = (
         pd.DataFrame(),
@@ -481,13 +481,13 @@ def noise_disturbance(mean, std):
     else:
         ref_found = False
 
-    # Merge observations and references
+    # Merge observations and references.
     if obs_found:
         obs_df_tmp = o_disturbances_df.copy(deep=True)
         obs_df_tmp["signal"] = "obs_" + (obs_df_tmp["observation"] + 1).astype(str)
         obs_df_tmp.insert(len(obs_df_tmp.columns), "type", "observation")
 
-        # Retrieve the requested observations
+        # Retrieve the requested observations.
         observations = args.observations if hasattr(args, "observations") else None
         observations = validate_observations(observations, o_disturbances_df)
         observations = [obs - 1 for obs in observations]  # Humans count from 1
@@ -498,7 +498,7 @@ def noise_disturbance(mean, std):
         ref_df_tmp.insert(len(ref_df_tmp.columns), "type", "reference")
     obs_ref_df = pd.concat([obs_df_tmp, ref_df_tmp], ignore_index=True)
 
-    # Loop though all disturbances and plot the observations and references in one plot
+    # Loop though all disturbances and plot the observations and references in one plot.
     fig_title = "{} under several {}.".format(
         "Observation and reference"
         if all([obs_found, ref_found])
@@ -510,7 +510,7 @@ def noise_disturbance(mean, std):
     obs_ref_df.loc[obs_ref_df["disturbance_index"] == 0, "disturbance"] = (
         obs_ref_df.loc[obs_ref_df["disturbance_index"] == 0, "disturbance"]
         + " (original)"
-    )  # Append original to original value
+    )  # Append original to original value.
     if not args.merged:
         num_plots = len(obs_ref_df.disturbance.unique())
         total_cols = 3
@@ -546,7 +546,7 @@ def noise_disturbance(mean, std):
         ).set_title(fig_title)
     figs["observations"].append(fig)
 
-    # Plot mean cost
+    # Plot mean cost.
     if rew_found:
         fig = plt.figure(tight_layout=True)
         figs["costs"].append(fig)
@@ -557,7 +557,7 @@ def noise_disturbance(mean, std):
                 r_disturbances_df["disturbance_index"] == 0, "disturbance"
             ]
             + " (original)"
-        )  # Append original to original value
+        )  # Append original to original value.
         sns.lineplot(
             data=r_disturbances_df, x="step", y="reward", ci="sd", hue="disturbance"
         ).set_title(
@@ -576,7 +576,7 @@ def noise_disturbance(mean, std):
             type="warning",
         )
 
-    # Plot states of interest
+    # Plot states of interest.
     if soi_found:
         n_soi = soi_disturbances_df["state_of_interest"].max() + 1
         soi_disturbances_df.loc[
@@ -586,7 +586,7 @@ def noise_disturbance(mean, std):
                 soi_disturbances_df["disturbance_index"] == 0, "disturbance"
             ]
             + " (original)"
-        )  # Append original to original value
+        )  # Append original to original value.
         for index in range(0, n_soi):
             fig = plt.figure(tight_layout=True)
             figs["states_of_interest"].append(fig)
@@ -614,7 +614,7 @@ def noise_disturbance(mean, std):
             type="warning",
         )
 
-    # Save plots
+    # Save plots.
     if args.save_figs:
         figs_path = output_dir.joinpath("figures")
         figs_extension = (
diff --git a/examples/manual_env_policy_inference.py b/examples/manual_env_policy_inference.py
index e7bfab369..9908dbf7b 100644
--- a/examples/manual_env_policy_inference.py
+++ b/examples/manual_env_policy_inference.py
@@ -22,19 +22,19 @@
         # NOTE: STEP: 1b: If step 1 fails recreate the environment and load the Pytorch/
         # TF2 agent separately.
 
-        # Create the environment
+        # Create the environment.
         # NOTE: Here the 'FlattenObservation' wrapper is used to make sure the alg works
         # with dictionary based observation spaces.
         env = gym.make("PandaReach-v1")
         env = gym.wrappers.FlattenObservation(env)
 
-        # Load the policy
+        # Load the policy.
         if AGENT_TYPE.lower() == "tf2":
             policy = load_tf_policy(AGENT_FOLDER, itr="last", env=env)  # Load TF2 agent
         else:
             policy = load_pytorch_policy(
                 AGENT_FOLDER, env=env, itr="last"
-            )  # Load Pytorch agent
+            )  # Load Pytorch agent.
 
     # NOTE: Step 2: Try to run the policy on the environment.
     try:
diff --git a/examples/pytorch/lac_ray_hyper_parameter_tuning.py b/examples/pytorch/lac_ray_hyper_parameter_tuning.py
index b938d8787..66469c6ec 100644
--- a/examples/pytorch/lac_ray_hyper_parameter_tuning.py
+++ b/examples/pytorch/lac_ray_hyper_parameter_tuning.py
@@ -21,7 +21,7 @@
 import gymnasium as gym
 import numpy as np
 
-# Import the algorithm we want to tune
+# Import the algorithm we want to tune.
 from stable_learning_control.control.algos.pytorch.lac.lac import lac
 from stable_learning_control.utils.import_utils import lazy_importer
 
@@ -40,10 +40,10 @@ def train_lac(config):
         config (dict): The Ray tuning configuration dictionary.
     """
 
-    # Unpack trainable arguments
+    # Unpack trainable arguments.
     env_name = config.pop("env_name")
 
-    # Run algorithm training
+    # Run algorithm training.
     lac(
         lambda: gym.make(env_name),
         **config,
@@ -51,18 +51,18 @@ def train_lac(config):
 
 
 if __name__ == "__main__":
-    # Pass system arguments to ray
+    # Pass system arguments to ray.
     if len(sys.argv) > 1:
         ray.init(redis_address=sys.argv[1])
 
-    # Setup the logging dir
+    # Setup the logging dir.
     dirname = osp.dirname(__file__)
     log_path = osp.abspath(osp.join(dirname, "../../data/ray_results"))
 
-    # Setup hyperparameter search starting point
+    # Setup hyperparameter search starting point.
     current_best_params = [{"gamma": 0.995, "lr_a": 1e-4, "alpha3": 0.2}]
 
-    # Setup the parameter space for you hyperparameter search
+    # Setup the parameter space for you hyperparameter search.
     # NOTE: This script uses the hyperopt search algorithm for efficient hyperparameter
     # selection. For more information see
     # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html?highlight=hyperopt.
@@ -81,7 +81,7 @@ def train_lac(config):
         points_to_evaluate=current_best_params,
     )
 
-    # Start the hyperparameter tuning job
+    # Start the hyperparameter tuning job.
     # NOTE: We use the ASHA job scheduler to early terminate bad trials, pause trials,
     # clone trials, and alter hyperparameters of a running trial. For more information
     # see https://docs.ray.io/en/master/tune/api_docs/schedulers.html.
@@ -102,7 +102,7 @@ def train_lac(config):
         local_dir=log_path,
     )
 
-    # Print the best trail
+    # Print the best trail.
     best_trial = analysis.get_best_trial(metric="mean_ep_ret", mode="min", scope="all")
     best_path = analysis.get_best_logdir(metric="mean_ep_ret", mode="min", scope="all")
     best_config = analysis.get_best_config(
diff --git a/examples/pytorch/sac_exp_grid_search.py b/examples/pytorch/sac_exp_grid_search.py
index f4c88a362..7f6422d94 100644
--- a/examples/pytorch/sac_exp_grid_search.py
+++ b/examples/pytorch/sac_exp_grid_search.py
@@ -19,10 +19,10 @@
 import torch
 from stable_learning_control.control.utils.run_utils import ExperimentGrid
 
-# Import the RL agent you want to perform the grid search for
+# Import the RL agent you want to perform the grid search for.
 from stable_learning_control.control.algos.pytorch.sac.sac import sac
 
-# Script parametesr
+# Scriptparameters.
 ENV_NAME = "Oscillator-v1"  # The environment on which you want to train the agent.
 
 if __name__ == "__main__":
@@ -41,5 +41,5 @@
     eg.add("ac_kwargs:hidden_sizes", [(32,), (64, 64)], "hid")
     eg.add("ac_kwargs:activation", [torch.nn.ReLU, torch.nn.ReLU], "")
 
-    # Run the grid search
+    # Run the grid search.
     eg.run(sac, num_cpu=args.cpu)
diff --git a/examples/pytorch/sac_ray_hyper_parameter_tuning.py b/examples/pytorch/sac_ray_hyper_parameter_tuning.py
index 6229962d6..14f8eed36 100644
--- a/examples/pytorch/sac_ray_hyper_parameter_tuning.py
+++ b/examples/pytorch/sac_ray_hyper_parameter_tuning.py
@@ -21,7 +21,7 @@
 import gymnasium as gym
 import numpy as np
 
-# Import the algorithm we want to tune
+# Import the algorithm we want to tune.
 from stable_learning_control.control.algos.pytorch.sac.sac import sac
 from stable_learning_control.utils.import_utils import lazy_importer
 
@@ -40,10 +40,10 @@ def train_sac(config):
         config (dict): The Ray tuning configuration dictionary.
     """
 
-    # Unpack trainable arguments
+    # Unpack trainable arguments.
     env_name = config.pop("env_name")
 
-    # Run algorithm training
+    # Run algorithm training.
     sac(
         lambda: gym.make(env_name),
         **config,
@@ -51,18 +51,18 @@ def train_sac(config):
 
 
 if __name__ == "__main__":
-    # Pass system arguments to ray
+    # Pass system arguments to ray.
     if len(sys.argv) > 1:
         ray.init(redis_address=sys.argv[1])
 
-    # Setup the logging dir
+    # Setup the logging dir.
     dirname = osp.dirname(__file__)
     log_path = osp.abspath(osp.join(dirname, "../../data/ray_results"))
 
-    # Setup hyperparameter search starting point
+    # Setup hyperparameter search starting point.
     current_best_params = [{"gamma": 0.995, "lr_a": 1e-4, "alpha": 0.99}]
 
-    # Setup the parameter space for you hyperparameter search
+    # Setup the parameter space for you hyperparameter search.
     # NOTE: This script uses the hyperopt search algorithm for efficient hyperparameter
     # selection. For more information see
     # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html?highlight=hyperopt.
@@ -81,7 +81,7 @@ def train_sac(config):
         points_to_evaluate=current_best_params,
     )
 
-    # Start the hyperparameter tuning job
+    # Start the hyperparameter tuning job.
     # NOTE: We use the ASHA job scheduler to early terminate bad trials, pause trials,
     # clone trials, and alter hyperparameters of a running trial. For more information
     # see https://docs.ray.io/en/master/tune/api_docs/schedulers.html.
@@ -102,7 +102,7 @@ def train_sac(config):
         local_dir=log_path,
     )
 
-    # Print the best trail
+    # Print the best trail.
     best_trial = analysis.get_best_trial(metric="mean_ep_ret", mode="min", scope="all")
     best_path = analysis.get_best_logdir(metric="mean_ep_ret", mode="min", scope="all")
     best_config = analysis.get_best_config(
diff --git a/examples/tf2/lac_ray_hyper_parameter_tuning.py b/examples/tf2/lac_ray_hyper_parameter_tuning.py
index 244a7b194..65d3a9c21 100644
--- a/examples/tf2/lac_ray_hyper_parameter_tuning.py
+++ b/examples/tf2/lac_ray_hyper_parameter_tuning.py
@@ -21,7 +21,7 @@
 import gymnasium as gym
 import numpy as np
 
-# Import the algorithm we want to tune
+# Import the algorithm we want to tune.
 from stable_learning_control.control.algos.tf2.lac.lac import lac
 from stable_learning_control.utils.import_utils import lazy_importer
 
@@ -40,10 +40,10 @@ def train_lac(config):
         config (dict): The Ray tuning configuration dictionary.
     """
 
-    # Unpack trainable arguments
+    # Unpack trainable arguments.
     env_name = config.pop("env_name")
 
-    # Run algorithm training
+    # Run algorithm training.
     lac(
         lambda: gym.make(env_name),
         **config,
@@ -51,18 +51,18 @@ def train_lac(config):
 
 
 if __name__ == "__main__":
-    # Pass system arguments to ray
+    # Pass system arguments to ray.
     if len(sys.argv) > 1:
         ray.init(redis_address=sys.argv[1])
 
-    # Setup the logging dir
+    # Setup the logging dir.
     dirname = osp.dirname(__file__)
     log_path = osp.abspath(osp.join(dirname, "../../data/ray_results"))
 
-    # Setup hyperparameter search starting point
+    # Setup hyperparameter search starting point.
     current_best_params = [{"gamma": 0.995, "lr_a": 1e-4, "alpha3": 0.2}]
 
-    # Setup the parameter space for you hyperparameter search
+    # Setup the parameter space for you hyperparameter search.
     # NOTE: This script uses the hyperopt search algorithm for efficient hyperparameter
     # selection. For more information see
     # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html?highlight=hyperopt.
@@ -81,7 +81,7 @@ def train_lac(config):
         points_to_evaluate=current_best_params,
     )
 
-    # Start the hyperparameter tuning job
+    # Start the hyperparameter tuning job.
     # NOTE: We use the ASHA job scheduler to early terminate bad trials, pause trials,
     # clone trials, and alter hyperparameters of a running trial. For more information
     # see https://docs.ray.io/en/master/tune/api_docs/schedulers.html.
@@ -102,7 +102,7 @@ def train_lac(config):
         local_dir=log_path,
     )
 
-    # Print the best trail
+    # Print the best trail.
     best_trial = analysis.get_best_trial(metric="mean_ep_ret", mode="min", scope="all")
     best_path = analysis.get_best_logdir(metric="mean_ep_ret", mode="min", scope="all")
     best_config = analysis.get_best_config(
diff --git a/examples/tf2/sac_exp_grid_search.py b/examples/tf2/sac_exp_grid_search.py
index a790b761f..5e0520d85 100644
--- a/examples/tf2/sac_exp_grid_search.py
+++ b/examples/tf2/sac_exp_grid_search.py
@@ -19,10 +19,10 @@
 import tensorflow as tf
 from stable_learning_control.control.utils.run_utils import ExperimentGrid
 
-# Import the RL agent you want to perform the grid search for
+# Import the RL agent you want to perform the grid search for.
 from stable_learning_control.control.algos.tf2.sac.sac import sac
 
-# Script parametesr
+# Scriptparameters.
 ENV_NAME = "Oscillator-v1"  # The environment on which you want to train the agent.
 
 if __name__ == "__main__":
@@ -41,5 +41,5 @@
     eg.add("ac_kwargs:hidden_sizes", [(32,), (64, 64)], "hid")
     eg.add("ac_kwargs:activation", [tf.nn.relu, tf.nn.relu], "")
 
-    # Run the grid search
+    # Run the grid search.
     eg.run(sac, num_cpu=args.cpu)
diff --git a/examples/tf2/sac_ray_hyper_parameter_tuning.py b/examples/tf2/sac_ray_hyper_parameter_tuning.py
index 2a2b5a3fc..c7ea83e2c 100644
--- a/examples/tf2/sac_ray_hyper_parameter_tuning.py
+++ b/examples/tf2/sac_ray_hyper_parameter_tuning.py
@@ -21,7 +21,7 @@
 import gymnasium as gym
 import numpy as np
 
-# Import the algorithm we want to tune
+# Import the algorithm we want to tune.
 from stable_learning_control.control.algos.tf2.sac.sac import sac
 from stable_learning_control.utils.import_utils import lazy_importer
 
@@ -40,10 +40,10 @@ def train_sac(config):
         config (dict): The Ray tuning configuration dictionary.
     """
 
-    # Unpack trainable arguments
+    # Unpack trainable arguments.
     env_name = config.pop("env_name")
 
-    # Run algorithm training
+    # Run algorithm training.
     sac(
         lambda: gym.make(env_name),
         **config,
@@ -51,18 +51,18 @@ def train_sac(config):
 
 
 if __name__ == "__main__":
-    # Pass system arguments to ray
+    # Pass system arguments to ray.
     if len(sys.argv) > 1:
         ray.init(redis_address=sys.argv[1])
 
-    # Setup the logging dir
+    # Setup the logging dir.
     dirname = osp.dirname(__file__)
     log_path = osp.abspath(osp.join(dirname, "../../data/ray_results"))
 
-    # Setup hyperparameter search starting point
+    # Setup hyperparameter search starting point.
     current_best_params = [{"gamma": 0.995, "lr_a": 1e-4, "alpha": 0.99}]
 
-    # Setup the parameter space for you hyperparameter search
+    # Setup the parameter space for you hyperparameter search.
     # NOTE: This script uses the hyperopt search algorithm for efficient hyperparameter
     # selection. For more information see
     # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html?highlight=hyperopt.
@@ -81,7 +81,7 @@ def train_sac(config):
         points_to_evaluate=current_best_params,
     )
 
-    # Start the hyperparameter tuning job
+    # Start the hyperparameter tuning job.
     # NOTE: We use the ASHA job scheduler to early terminate bad trials, pause trials,
     # clone trials, and alter hyperparameters of a running trial. For more information
     # see https://docs.ray.io/en/master/tune/api_docs/schedulers.html.
@@ -102,7 +102,7 @@ def train_sac(config):
         local_dir=log_path,
     )
 
-    # Print the best trail
+    # Print the best trail.
     best_trial = analysis.get_best_trial(metric="mean_ep_ret", mode="min", scope="all")
     best_path = analysis.get_best_logdir(metric="mean_ep_ret", mode="min", scope="all")
     best_config = analysis.get_best_config(
diff --git a/sandbox/test_algorithm_seeding.py b/sandbox/test_algorithm_seeding.py
new file mode 100644
index 000000000..e08083953
--- /dev/null
+++ b/sandbox/test_algorithm_seeding.py
@@ -0,0 +1,44 @@
+"""Test the LAC seeding process."""
+
+from stable_learning_control.control.algos.pytorch.lac.lac import LAC
+from stable_learning_control.control.algos.pytorch.lac.lac import LyapunovActorCritic
+import stable_gym  # noqa: F401
+import gymnasium as gym
+from gymnasium.utils import seeding
+
+if __name__ == "__main__":
+    # Create environment.
+    env = gym.make("Oscillator-v1")
+
+    # Seed the environment.
+    generator, seed = seeding.np_random(0)
+    env.np_random = generator
+    test = generator.random()
+    test2 = generator.random()
+    test3 = generator.random()
+    test4 = generator.random()
+    env.action_space.seed(seed)
+    env.observation_space.seed(seed)
+    test_act = env.action_space.sample()
+    test_obs = env.observation_space.sample()
+
+    # Check the environment.
+    obs, info = env.reset(seed=0)
+    test_act = env.action_space.sample()
+    test_obs = env.observation_space.sample()
+    print(f"Initial observation: {obs}")
+    print(f"Initial info: {info}")
+    terminated, truncated = False, False
+    while not (terminated or truncated):
+        action = env.action_space.sample()
+        obs, reward, terminated, truncated, info = env.step(action)
+        print(f"Action: {action}")
+        print(f"Observation: {obs}")
+        print(f"Reward: {reward}")
+        print(f"Terminated: {terminated}")
+        print(f"Info: {info}")
+        truncated = info.get("TimeLimit.truncated", False)
+
+    # Create agent and policy.
+    agent = LyapunovActorCritic()
+    policy = LAC()
diff --git a/sandbox/test_gym_env.py b/sandbox/test_gym_env.py
index 6c3948171..ce44cfbeb 100644
--- a/sandbox/test_gym_env.py
+++ b/sandbox/test_gym_env.py
@@ -17,7 +17,7 @@
 if __name__ == "__main__":
     env = gym.make(ENV_NAME)
 
-    # Take T steps in the environment
+    # Take T steps in the environment.
     T = 1000
     tau = 0.1
     path = []
@@ -39,7 +39,7 @@
         t1.append(i * tau)
     print("Finished Cartpole environment simulation.")
 
-    # Plot results
+    # Plot results.
     print("Plot results.")
     fig = plt.figure(figsize=(9, 6))
     ax = fig.add_subplot(111)
diff --git a/sandbox/test_traj_buffer.py b/sandbox/test_traj_buffer.py
index 6e1578d00..f700d34e8 100644
--- a/sandbox/test_traj_buffer.py
+++ b/sandbox/test_traj_buffer.py
@@ -12,10 +12,10 @@
 
 
 if __name__ == "__main__":
-    # Create dummy environment
+    # Create dummy environment.
     env = gym.make("CartPoleCost-v0")
 
-    # Dummy algorithm settings
+    # Dummy algorithm settings.
     obs_dim = env.observation_space.shape[0]
     act_dim = env.action_space.shape[0]
     rew_dim = env.reward_range.shape[0]
@@ -23,7 +23,7 @@
     epochs = 10
     local_steps_per_epoch = 100
 
-    # Create Memory Buffer
+    # Create Memory Buffer.
     buffer = TrajectoryBuffer(
         obs_dim=obs_dim,
         act_dim=act_dim,
@@ -33,26 +33,26 @@
         incomplete=True,
     )
 
-    # Create test dummy data
+    # Create test dummy data.
     o, ep_ret, ep_len = env.reset(), 0, 0
     for epoch in range(epochs):
         for t in range(local_steps_per_epoch):
-            # Retrieve data from the environment
+            # Retrieve data from the environment.
             a = env.action_space.sample()
             next_o, r, d, _ = env.step(a)
 
-            # Store data in buffer
+            # Store data in buffer.
 
             buffer.store(o, a, r, next_o, d)
 
             # Update obs (critical!)
             o = next_o
 
-            # Finish path
+            # Finish path.
             if d:
                 buffer.finish_path()
                 o, ep_ret, ep_len = env.reset(), 0, 0
 
-        # Retrieve data from buffer
+        # Retrieve data from buffer.
         buffer_data = buffer.get(flat=False)
         print("test")
diff --git a/stable_learning_control/control/algos/__init__.py b/stable_learning_control/control/algos/__init__.py
index 83c1bdf6c..85507ce58 100644
--- a/stable_learning_control/control/algos/__init__.py
+++ b/stable_learning_control/control/algos/__init__.py
@@ -3,7 +3,7 @@
 
 from stable_learning_control.utils.import_utils import import_tf
 
-# Put algorithms on namespace
+# Put algorithms on namespace.
 from stable_learning_control.control.algos.pytorch.lac.lac import LAC as LAC_pytorch
 from stable_learning_control.control.algos.pytorch.sac.sac import SAC as SAC_pytorch
 
diff --git a/stable_learning_control/control/algos/pytorch/common/buffers.py b/stable_learning_control/control/algos/pytorch/common/buffers.py
index 952a8774b..30163a87e 100644
--- a/stable_learning_control/control/algos/pytorch/common/buffers.py
+++ b/stable_learning_control/control/algos/pytorch/common/buffers.py
@@ -44,7 +44,7 @@ def sample_batch(self, *args, **kwargs):
             super().sample_batch(*args, **kwargs),
             dtype=torch.float32,
             device=self.device,
-        )  # Make sure output is a torch tensor
+        )  # Make sure output is a torch tensor.
 
 
 class TrajectoryBuffer(TrajectoryBuffer):
@@ -84,4 +84,4 @@ def get(self, *args, **kwargs):
         """
         return np_to_torch(
             super().get(*args, **kwargs), dtype=torch.float32, device=self.device
-        )  # Make sure output is a torch tensor
+        )  # Make sure output is a torch tensor.
diff --git a/stable_learning_control/control/algos/pytorch/common/get_lr_scheduler.py b/stable_learning_control/control/algos/pytorch/common/get_lr_scheduler.py
index 7b86100f2..4320a65cf 100644
--- a/stable_learning_control/control/algos/pytorch/common/get_lr_scheduler.py
+++ b/stable_learning_control/control/algos/pytorch/common/get_lr_scheduler.py
@@ -102,4 +102,4 @@ def lr_multiplier_function(step):
     else:
         return torch.optim.lr_scheduler.LambdaLR(
             optimizer, lr_lambda=lambda step: np.longdouble(1.0)
-        )  # Return a constant function
+        )  # Return a constant function.
diff --git a/stable_learning_control/control/algos/pytorch/common/helpers.py b/stable_learning_control/control/algos/pytorch/common/helpers.py
index e3763a45c..3418d35ed 100644
--- a/stable_learning_control/control/algos/pytorch/common/helpers.py
+++ b/stable_learning_control/control/algos/pytorch/common/helpers.py
@@ -53,7 +53,7 @@ def mlp(sizes, activation, output_activation=nn.Identity):
     Returns:
         torch.nn.Sequential: The multi-layered perceptron.
     """  # noqa: E501
-    # Try to retrieve the activation function if a string was supplied
+    # Try to retrieve the activation function if a string was supplied.
     if isinstance(activation, str):
         activation = get_activation_function(activation, backend="torch")
     if isinstance(output_activation, str):
diff --git a/stable_learning_control/control/algos/pytorch/lac/lac.py b/stable_learning_control/control/algos/pytorch/lac/lac.py
index 3d66874e4..1c707236f 100644
--- a/stable_learning_control/control/algos/pytorch/lac/lac.py
+++ b/stable_learning_control/control/algos/pytorch/lac/lac.py
@@ -60,15 +60,15 @@
     setup_logger_kwargs,
 )
 
-# Import ray tuner if installed
+# Import ray tuner if installed.
 tune = lazy_importer(module_name="ray.tune")
 
-# Script settings
+# Script settings.
 SCALE_LAMBDA_MIN_MAX = (
     0.0,
     1.0,
-)  # Range of lambda lagrance multiplier
-SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier
+)  # Range of lambda lagrance multiplier.
+SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier.
 STD_OUT_LOG_VARS_DEFAULT = [
     "Epoch",
     "TotalEnvInteracts",
@@ -210,7 +210,7 @@ def __init__(  # noqa: C901
             k: v for k, v in locals().items() if k not in ["self", "__class__", "env"]
         }
 
-        # Validate gymnasium env
+        # Validate gymnasium env.
         # NOTE: The current implementation only works with continuous spaces.
         if not is_gym_env(env):
             raise ValueError("Env must be a valid gymnasium environment.")
@@ -246,7 +246,7 @@ def __init__(  # noqa: C901
             type="info",
         )
 
-        # Store algorithm parameters
+        # Store algorithm parameters.
         self._act_dim = env.action_space.shape
         self._obs_dim = env.observation_space.shape
         self._device = retrieve_device(device)
@@ -265,7 +265,7 @@ def __init__(  # noqa: C901
         else:
             self._target_entropy = target_entropy
 
-        # Create variables for the Lagrance multipliers
+        # Create variables for the Lagrance multipliers.
         # NOTE: Clip at 1e-37 to prevent log_alpha/log_lambda from becoming -np.inf
         self.log_alpha = nn.Parameter(
             torch.tensor(np.log(1e-37 if alpha < 1e-37 else alpha), requires_grad=True)
@@ -291,7 +291,7 @@ def __init__(  # noqa: C901
         for p in self.ac_targ.parameters():
             p.requires_grad = False
 
-        # Create optimizers
+        # Create optimizers.
         # NOTE: We here optimize for log_alpha and log_labda instead of alpha and labda
         # because it is more numerically stable (see:
         # https://github.com/rail-berkeley/softlearning/issues/136)
@@ -367,9 +367,9 @@ def update(self, data):  # noqa: C901
                 o_
             )  # NOTE: Target actions come from *current* *target* policy
             l_pi_targ = self.ac_targ.L(o_, pi_targ_)
-            l_backup = r + self._gamma * (1 - d) * l_pi_targ  # The Lyapunov candidate
+            l_backup = r + self._gamma * (1 - d) * l_pi_targ  # The Lyapunov candidate.
 
-        # Get current Lyapunov value
+        # Get current Lyapunov value.
         l1 = self.ac.L(o, a)
 
         # Calculate Lyapunov *CRITIC* error
@@ -407,11 +407,11 @@ def update(self, data):  # noqa: C901
                 "if you need this."
             )
 
-        # Get target lyapunov value
+        # Get target lyapunov value.
         pi_, _ = self.ac.pi(o_)  # NOTE: Target actions come from *current* policy
         lya_l_ = self.ac.L(o_, pi_)
 
-        # Compute Lyapunov Actor error
+        # Compute Lyapunov Actor error.
         l_delta = torch.mean(lya_l_ - l1.detach() + self._alpha3 * r)  # See Han eq. 11
 
         # Calculate entropy-regularized policy loss
@@ -437,7 +437,7 @@ def update(self, data):  # noqa: C901
         if self._adaptive_temperature:
             self._log_alpha_optimizer.zero_grad()
 
-            # Calculate alpha loss
+            # Calculate alpha loss.
             alpha_loss = -(
                 self.alpha * (logp_pi.detach() + self.target_entropy)
             ).mean()  # See Haarnoja eq. 17
@@ -454,7 +454,7 @@ def update(self, data):  # noqa: C901
         ################################################
         self._log_labda_optimizer.zero_grad()
 
-        # Calculate labda loss
+        # Calculate labda loss.
         # NOTE: Log_labda was used in the lambda_loss function because using lambda
         # caused the gradients to vanish. This is caused since we restrict lambda
         # within a 0-1.0 range using the clamp function (see #38). Using log_lambda
@@ -496,7 +496,7 @@ def save(self, path):
         except Exception as e:
             raise Exception("LAC model could not be saved.") from e
 
-        # Save additional information
+        # Save additional information.
         save_info = {
             "alg_name": self.__class__.__name__,
             "setup_kwargs": self._setup_kwargs,
@@ -641,7 +641,7 @@ def state_dict(self):
         state_dict = super().state_dict()
         state_dict[
             "alg_name"
-        ] = self.__class__.__name__  # Save algorithm name state dict
+        ] = self.__class__.__name__  # Save algorithm name state dict.
         return state_dict
 
     def bound_lr(
@@ -958,7 +958,7 @@ def lac(  # noqa: C901
 
     env = env_fn()
 
-    # Validate gymnasium env
+    # Validate gymnasium env.
     # NOTE: The current implementation only works with continuous spaces.
     if not is_gym_env(env):
         raise ValueError("Env must be a valid gymnasium environment.")
@@ -1004,9 +1004,9 @@ def lac(  # noqa: C901
     hyper_paramet_dict = {
         k: v for k, v in locals().items() if k not in ["logger"]
     }  # Retrieve hyperparameters (Ignore logger object)
-    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger
+    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger.
 
-    # Retrieve max episode length
+    # Retrieve max episode length.
     if max_ep_len is None:
         max_ep_len = env.env._max_episode_steps
     else:
@@ -1027,7 +1027,7 @@ def lac(  # noqa: C901
     # Get default actor critic if no 'actor_critic' was supplied
     actor_critic = LyapunovActorCritic if actor_critic is None else actor_critic
 
-    # Set random seed for reproducible results
+    # Set random seed for reproducible results.
     if seed is not None:
         os.environ["PYTHONHASHSEED"] = str(seed)
         torch.manual_seed(seed)
@@ -1051,7 +1051,7 @@ def lac(  # noqa: C901
         device,
     )
 
-    # Restore policy if supplied
+    # Restore policy if supplied.
     if start_policy is not None:
         logger.log(f"Restoring model from '{start_policy}'.", type="info")
         try:
@@ -1074,13 +1074,13 @@ def lac(  # noqa: C901
         device=policy.device,
     )
 
-    # Count variables and print network structure
+    # Count variables and print network structure.
     var_counts = tuple(count_vars(module) for module in [policy.ac.pi, policy.ac.L])
     logger.log("Number of parameters: \t pi: %d, \t L: %d\n" % var_counts, type="info")
     logger.log("Network structure:\n", type="info")
     logger.log(policy.ac, end="\n\n")
 
-    # Create learning rate schedulers
+    # Create learning rate schedulers.
     opt_schedulers = []
     lr_decay_ref_var = total_steps if lr_decay_ref.lower() == "steps" else epochs
     pi_opt_scheduler = get_lr_scheduler(
@@ -1106,7 +1106,7 @@ def lac(  # noqa: C901
 
     logger.setup_pytorch_saver(policy)
 
-    # Setup diagnostics tb_write dict and store initial learning rates
+    # Setup diagnostics tb_write dict and store initial learning rates.
     diag_tb_log_list = [
         "ErrorL",
         "LossPi",
@@ -1155,7 +1155,7 @@ def lac(  # noqa: C901
         else:
             a = env.action_space.sample()
 
-        # Take step in the env
+        # Take step in the env.
         o_, r, d, truncated, _ = env.step(a)
         ep_ret += r
         ep_len += 1
@@ -1165,28 +1165,28 @@ def lac(  # noqa: C901
         # Make sure to update most recent observation!
         o = o_
 
-        # End of trajectory handling
+        # End of trajectory handling.
         if d or truncated:
             logger.store(EpRet=ep_ret, EpLen=ep_len)
             o, _ = env.reset()
             ep_ret, ep_len = 0, 0
 
-        # Update handling
+        # Update handling.
         if (t + 1) >= update_after and ((t + 1) - update_after) % update_every == 0:
-            # Step based learning rate decay
+            # Step based learning rate decay.
             if lr_decay_ref.lower() == "step":
                 for scheduler in opt_schedulers:
                     scheduler.step()
                 policy.bound_lr(
                     lr_a_final, lr_c_final, lr_a_final, lr_a_final
-                )  # Make sure lr is bounded above the final lr
+                )  # Make sure lr is bounded above the final lr.
 
             for _ in range(steps_per_update):
                 batch = replay_buffer.sample_batch(batch_size)
                 update_diagnostics = policy.update(data=batch)
-                logger.store(**update_diagnostics)  # Log diagnostics
+                logger.store(**update_diagnostics)  # Log diagnostics.
 
-            # SGD batch tb logging
+            # SGD batch tb logging.
             if use_tensorboard and not tb_low_log_freq:
                 logger.log_to_tb(keys=diag_tb_log_list, global_step=t)
 
@@ -1194,11 +1194,11 @@ def lac(  # noqa: C901
         if (t + 1) % steps_per_epoch == 0:
             epoch = (t + 1) // steps_per_epoch
 
-            # Save model
+            # Save model.
             if (epoch % save_freq == 0) or (epoch == epochs):
                 logger.save_state({"env": env}, itr=epoch)
 
-            # Test the performance of the deterministic version of the agent
+            # Test the performance of the deterministic version of the agent.
             if num_test_episodes != 0:
                 eps_ret, eps_len = test_agent(
                     policy, test_env, num_test_episodes, max_ep_len=max_ep_len
@@ -1209,15 +1209,15 @@ def lac(  # noqa: C901
                     extend=True,
                 )
 
-            # Epoch based learning rate decay
+            # Epoch based learning rate decay.
             if lr_decay_ref.lower() != "step":
                 for scheduler in opt_schedulers:
                     scheduler.step()
                 policy.bound_lr(
                     lr_a_final, lr_c_final, lr_a_final, lr_a_final
-                )  # Make sure lr is bounded above the final lr
+                )  # Make sure lr is bounded above the final lr.
 
-            # Log performance measure to ray tuning
+            # Log performance measure to ray tuning.
             # NOTE: Only executed when the ray tuner invokes the script
             if hasattr(tune, "session") and tune.session._session is not None:
                 mean_ep_ret = logger.get_stats("EpRet")
@@ -1226,7 +1226,7 @@ def lac(  # noqa: C901
                     mean_ep_ret=mean_ep_ret[0], epoch=epoch, mean_ep_len=mean_ep_len[0]
                 )
 
-            # Log info about epoch
+            # Log info about epoch.
             logger.log_tabular("Epoch", epoch)
             logger.log_tabular("TotalEnvInteracts", t)
             logger.log_tabular(
@@ -1546,7 +1546,7 @@ def lac(  # noqa: C901
         ),
     )
 
-    # Parse logger related arguments
+    # Parse logger related arguments.
     parser.add_argument(
         "--exp_name",
         type=str,
@@ -1605,7 +1605,7 @@ def lac(  # noqa: C901
     )
     args = parser.parse_args()
 
-    # Setup actor critic arguments
+    # Setup actor critic arguments.
     output_activation = {}
     output_activation["actor"] = safer_eval(args.act_out_a, backend="torch")
     ac_kwargs = dict(
@@ -1620,7 +1620,7 @@ def lac(  # noqa: C901
         output_activation=output_activation,
     )
 
-    # Setup output dir for logger and return output kwargs
+    # Setup output dir for logger and return output kwargs.
     logger_kwargs = setup_logger_kwargs(
         args.exp_name,
         args.seed,
diff --git a/stable_learning_control/control/algos/pytorch/policies/actors/squashed_gaussian_actor.py b/stable_learning_control/control/algos/pytorch/policies/actors/squashed_gaussian_actor.py
index 234de7e24..a5d8b4cf7 100644
--- a/stable_learning_control/control/algos/pytorch/policies/actors/squashed_gaussian_actor.py
+++ b/stable_learning_control/control/algos/pytorch/policies/actors/squashed_gaussian_actor.py
@@ -89,7 +89,7 @@ def forward(self, obs, deterministic=False, with_logprob=True):
                 - pi_action (:obj:`torch.Tensor`): The actions given by the policy.
                 - logp_pi (:obj:`torch.Tensor`): The log probabilities of each of these actions.
         """  # noqa: E501
-        # Make sure the observations are on the right device
+        # Make sure the observations are on the right device.
         if obs.device != self.net[0].weight.device:
             if not self.__device_warning_logged:
                 device_warn_msg = (
@@ -109,7 +109,7 @@ def forward(self, obs, deterministic=False, with_logprob=True):
                 self.__device_warning_logged = True
             obs = obs.to(self.net[0].weight.device)
 
-        # Calculate mean action and standard deviation
+        # Calculate mean action and standard deviation.
         net_out = self.net(obs)
         mu = self.mu_layer(net_out)
         log_std = self.log_std_layer(net_out)
@@ -124,7 +124,7 @@ def forward(self, obs, deterministic=False, with_logprob=True):
         else:
             pi_action = (
                 pi_distribution.rsample()
-            )  # Sample while using the parameterization trick
+            )  # Sample while using the parameterization trick.
 
         # Compute logprob from Gaussian, and then apply correction for Tanh squashing.
         if with_logprob:
@@ -141,10 +141,10 @@ def forward(self, obs, deterministic=False, with_logprob=True):
         else:
             logp_pi = None
 
-        # Calculate scaled action and return the action and its log probability
+        # Calculate scaled action and return the action and its log probability.
         pi_action = torch.tanh(pi_action)  # Squash gaussian to be between -1 and 1
 
-        # Clamp the actions such that they are in range of the environment
+        # Clamp the actions such that they are in range of the environment.
         if self.act_limits is not None:
             pi_action = clamp(
                 pi_action,
diff --git a/stable_learning_control/control/algos/pytorch/policies/critics/L_critic.py b/stable_learning_control/control/algos/pytorch/policies/critics/L_critic.py
index b6cf0357c..364798c55 100644
--- a/stable_learning_control/control/algos/pytorch/policies/critics/L_critic.py
+++ b/stable_learning_control/control/algos/pytorch/policies/critics/L_critic.py
@@ -52,7 +52,7 @@ def forward(self, obs, act):
                 The tensor containing the lyapunov values of the input observations and
                 actions.
         """
-        # Make sure the observations and actions are on the right device
+        # Make sure the observations and actions are on the right device.
         self._obs_same_device = obs.device != self.L[0].weight.device
         self._act_same_device = act.device != self.L[0].weight.device
         if self._obs_same_device or self._act_same_device:
diff --git a/stable_learning_control/control/algos/pytorch/policies/critics/Q_critic.py b/stable_learning_control/control/algos/pytorch/policies/critics/Q_critic.py
index 198ac638c..56c0c66f6 100644
--- a/stable_learning_control/control/algos/pytorch/policies/critics/Q_critic.py
+++ b/stable_learning_control/control/algos/pytorch/policies/critics/Q_critic.py
@@ -60,7 +60,7 @@ def forward(self, obs, act):
                 The tensor containing the Q values of the input observations and
                 actions.
         """
-        # Make sure the observations and actions are on the right device
+        # Make sure the observations and actions are on the right device.
         self._obs_same_device = obs.device != self.Q[0].weight.device
         self._act_same_device = act.device != self.Q[0].weight.device
         if self._obs_same_device or self._act_same_device:
diff --git a/stable_learning_control/control/algos/pytorch/sac/sac.py b/stable_learning_control/control/algos/pytorch/sac/sac.py
index 916701420..f283c0777 100644
--- a/stable_learning_control/control/algos/pytorch/sac/sac.py
+++ b/stable_learning_control/control/algos/pytorch/sac/sac.py
@@ -61,15 +61,15 @@
     setup_logger_kwargs,
 )
 
-# Import ray tuner if installed
+# Import ray tuner if installed.
 tune = lazy_importer(module_name="ray.tune")
 
-# Script settings
+# Script settings.
 SCALE_LAMBDA_MIN_MAX = (
     0.0,
     1.0,
-)  # Range of lambda lagrance multiplier
-SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier
+)  # Range of lambda lagrance multiplier.
+SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier.
 STD_OUT_LOG_VARS_DEFAULT = [
     "Epoch",
     "TotalEnvInteracts",
@@ -203,7 +203,7 @@ def __init__(  # noqa: C901
             k: v for k, v in locals().items() if k not in ["self", "__class__", "env"]
         }
 
-        # Validate gymnasium env
+        # Validate gymnasium env.
         # NOTE: The current implementation only works with continuous spaces.
         if not is_gym_env(env):
             raise ValueError("Env must be a valid gymnasium environment.")
@@ -239,7 +239,7 @@ def __init__(  # noqa: C901
             type="info",
         )
 
-        # Store algorithm parameters
+        # Store algorithm parameters.
         self._act_dim = env.action_space.shape
         self._obs_dim = env.observation_space.shape
         self._device = retrieve_device(device)
@@ -256,7 +256,7 @@ def __init__(  # noqa: C901
         else:
             self._target_entropy = target_entropy
 
-        # Create variables for the Lagrance multipliers
+        # Create variables for the Lagrance multipliers.
         # NOTE: Clip at 1e-37 to prevent log_alpha/log_lambda from becoming -np.inf
         self.log_alpha = nn.Parameter(
             torch.tensor(np.log(1e-37 if alpha < 1e-37 else alpha), requires_grad=True)
@@ -279,7 +279,7 @@ def __init__(  # noqa: C901
         for p in self.ac_targ.parameters():
             p.requires_grad = False
 
-        # Create optimizers
+        # Create optimizers.
         # NOTE: We here optimize for log_alpha instead of alpha because it is more
         # numerically stable (see:
         # https://github.com/rail-berkeley/softlearning/issues/136)
@@ -350,7 +350,7 @@ def update(self, data):  # noqa: C901
                 o_
             )  # NOTE: Target actions coming from *current* policy
 
-            # Get target Q values based on optimization type
+            # Get target Q values based on optimization type.
             q1_pi_targ = self.ac_targ.Q1(o_, pi_)
             q2_pi_targ = self.ac_targ.Q2(o_, pi_)
             if self._opt_type.lower() == "minimize":
@@ -361,10 +361,10 @@ def update(self, data):  # noqa: C901
             else:
                 q_pi_targ = torch.min(
                     q1_pi_targ, q2_pi_targ
-                )  # Use min clipping to prevent overestimation bias
+                )  # Use min clipping to prevent overestimation bias.
             q_backup = r + self._gamma * (1 - d) * (q_pi_targ - self.alpha * logp_pi_)
 
-        # Retrieve the current Q values
+        # Retrieve the current Q values.
         q1 = self.ac.Q1(o, a)
         q2 = self.ac.Q2(o, a)
 
@@ -393,7 +393,7 @@ def update(self, data):  # noqa: C901
         # Retrieve log probabilities of batch observations based on *current* policy
         pi, logp_pi = self.ac.pi(o)
 
-        # Retrieve current Q values
+        # Retrieve current Q values.
         # NOTE: Actions come from *current* policy
         q1_pi = self.ac.Q1(o, pi)
         q2_pi = self.ac.Q2(o, pi)
@@ -428,7 +428,7 @@ def update(self, data):  # noqa: C901
         if self._adaptive_temperature:
             self._log_alpha_optimizer.zero_grad()
 
-            # Calculate alpha loss
+            # Calculate alpha loss.
             alpha_loss = -(
                 self.alpha * (logp_pi.detach() + self.target_entropy)
             ).mean()  # See Haarnoja eq. 17
@@ -467,7 +467,7 @@ def save(self, path):
         except Exception as e:
             raise Exception("SAC model could not be saved.") from e
 
-        # Save additional information
+        # Save additional information.
         save_info = {
             "alg_name": self.__class__.__name__,
             "setup_kwargs": self._setup_kwargs,
@@ -612,7 +612,7 @@ def state_dict(self):
         state_dict = super().state_dict()
         state_dict[
             "alg_name"
-        ] = self.__class__.__name__  # Save algorithm name state dict
+        ] = self.__class__.__name__  # Save algorithm name state dict.
         return state_dict
 
     def bound_lr(self, lr_a_final=None, lr_c_final=None, lr_alpha_final=None):
@@ -895,7 +895,7 @@ def sac(  # noqa: C901
 
     env = env_fn()
 
-    # Validate gymnasium env
+    # Validate gymnasium env.
     # NOTE: The current implementation only works with continuous spaces.
     if not is_gym_env(env):
         raise ValueError("Env must be a valid gymnasium environment.")
@@ -941,9 +941,9 @@ def sac(  # noqa: C901
     hyper_paramet_dict = {
         k: v for k, v in locals().items() if k not in ["logger"]
     }  # Retrieve hyperparameters (Ignore logger object)
-    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger
+    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger.
 
-    # Retrieve max episode length
+    # Retrieve max episode length.
     if max_ep_len is None:
         max_ep_len = env.env._max_episode_steps
     else:
@@ -964,7 +964,7 @@ def sac(  # noqa: C901
     # Get default actor critic if no 'actor_critic' was supplied
     actor_critic = SoftActorCritic if actor_critic is None else actor_critic
 
-    # Set random seed for reproducible results
+    # Set random seed for reproducible results.
     if seed is not None:
         os.environ["PYTHONHASHSEED"] = str(seed)
         torch.manual_seed(seed)
@@ -986,7 +986,7 @@ def sac(  # noqa: C901
         device,
     )
 
-    # Restore policy if supplied
+    # Restore policy if supplied.
     if start_policy is not None:
         logger.log(f"Restoring model from '{start_policy}'.", type="info")
         try:
@@ -1009,7 +1009,7 @@ def sac(  # noqa: C901
         device=policy.device,
     )
 
-    # Count variables and print network structure
+    # Count variables and print network structure.
     var_counts = tuple(
         count_vars(module) for module in [policy.ac.pi, policy.ac.Q1, policy.ac.Q2]
     )
@@ -1020,7 +1020,7 @@ def sac(  # noqa: C901
     logger.log("Network structure:\n", type="info")
     logger.log(policy.ac, end="\n\n")
 
-    # Create learning rate schedulers
+    # Create learning rate schedulers.
     opt_schedulers = []
     lr_decay_ref_var = total_steps if lr_decay_ref.lower() == "steps" else epochs
     pi_opt_scheduler = get_lr_scheduler(
@@ -1038,7 +1038,7 @@ def sac(  # noqa: C901
 
     logger.setup_pytorch_saver(policy)
 
-    # Setup diagnostics tb_write dict and store initial learning rates
+    # Setup diagnostics tb_write dict and store initial learning rates.
     diag_tb_log_list = ["LossQ", "LossPi", "Alpha", "LossAlpha", "Entropy"]
     if use_tensorboard:
         logger.log_to_tb(
@@ -1073,7 +1073,7 @@ def sac(  # noqa: C901
         else:
             a = env.action_space.sample()
 
-        # Take step in the env
+        # Take step in the env.
         o_, r, d, truncated, _ = env.step(a)
         ep_ret += r
         ep_len += 1
@@ -1083,28 +1083,28 @@ def sac(  # noqa: C901
         # Make sure to update most recent observation!
         o = o_
 
-        # End of trajectory handling
+        # End of trajectory handling.
         if d or truncated:
             logger.store(EpRet=ep_ret, EpLen=ep_len)
             o, _ = env.reset()
             ep_ret, ep_len = 0, 0
 
-        # Update handling
+        # Update handling.
         if (t + 1) >= update_after and ((t + 1) - update_after) % update_every == 0:
-            # Step based learning rate decay
+            # Step based learning rate decay.
             if lr_decay_ref.lower() == "step":
                 for scheduler in opt_schedulers:
                     scheduler.step()
                 policy.bound_lr(
                     lr_a_final, lr_c_final, lr_a_final
-                )  # Make sure lr is bounded above the final lr
+                )  # Make sure lr is bounded above the final lr.
 
             for _ in range(steps_per_update):
                 batch = replay_buffer.sample_batch(batch_size)
                 update_diagnostics = policy.update(data=batch)
-                logger.store(**update_diagnostics)  # Log diagnostics
+                logger.store(**update_diagnostics)  # Log diagnostics.
 
-            # SGD batch tb logging
+            # SGD batch tb logging.
             if use_tensorboard and not tb_low_log_freq:
                 logger.log_to_tb(keys=diag_tb_log_list, global_step=t)
 
@@ -1112,11 +1112,11 @@ def sac(  # noqa: C901
         if (t + 1) % steps_per_epoch == 0:
             epoch = (t + 1) // steps_per_epoch
 
-            # Save model
+            # Save model.
             if (epoch % save_freq == 0) or (epoch == epochs):
                 logger.save_state({"env": env}, itr=epoch)
 
-            # Test the performance of the deterministic version of the agent
+            # Test the performance of the deterministic version of the agent.
             if num_test_episodes != 0:
                 eps_ret, eps_len = test_agent(
                     policy, test_env, num_test_episodes, max_ep_len=max_ep_len
@@ -1127,15 +1127,15 @@ def sac(  # noqa: C901
                     extend=True,
                 )
 
-            # Epoch based learning rate decay
+            # Epoch based learning rate decay.
             if lr_decay_ref.lower() != "step":
                 for scheduler in opt_schedulers:
                     scheduler.step()
                 policy.bound_lr(
                     lr_a_final, lr_c_final, lr_a_final
-                )  # Make sure lr is bounded above the final lr
+                )  # Make sure lr is bounded above the final lr.
 
-            # Log performance measure to ray tuning
+            # Log performance measure to ray tuning.
             # NOTE: Only executed when the ray tuner invokes the script
             if hasattr(tune, "session") and tune.session._session is not None:
                 mean_ep_ret = logger.get_stats("EpRet")
@@ -1144,7 +1144,7 @@ def sac(  # noqa: C901
                     mean_ep_ret=mean_ep_ret[0], epoch=epoch, mean_ep_len=mean_ep_len[0]
                 )
 
-            # Log info about epoch
+            # Log info about epoch.
             logger.log_tabular("Epoch", epoch)
             logger.log_tabular("TotalEnvInteracts", t)
             logger.log_tabular(
@@ -1443,7 +1443,7 @@ def sac(  # noqa: C901
         ),
     )
 
-    # Parse logger related arguments
+    # Parse logger related arguments.
     parser.add_argument(
         "--exp_name",
         type=str,
@@ -1502,7 +1502,7 @@ def sac(  # noqa: C901
     )
     args = parser.parse_args()
 
-    # Setup actor critic arguments
+    # Setup actor critic arguments.
     output_activation = {}
     output_activation["actor"] = safer_eval(args.act_out_a, backend="torch")
     output_activation["critic"] = safer_eval(args.act_out_c, backend="torch")
@@ -1518,7 +1518,7 @@ def sac(  # noqa: C901
         output_activation=output_activation,
     )
 
-    # Setup output dir for logger and return output kwargs
+    # Setup output dir for logger and return output kwargs.
     logger_kwargs = setup_logger_kwargs(
         args.exp_name,
         args.seed,
diff --git a/stable_learning_control/control/algos/tf2/__init__.py b/stable_learning_control/control/algos/tf2/__init__.py
index bad9aabe1..ab985d609 100644
--- a/stable_learning_control/control/algos/tf2/__init__.py
+++ b/stable_learning_control/control/algos/tf2/__init__.py
@@ -1,6 +1,6 @@
 """Contains the Tensorflow 2.x implementations of the RL/IL algorithms.
 """
 
-# Put algorithms on namespace for easy loading in the test_policy utility
+# Put algorithms on namespace for easy loading in the test_policy utility.
 from stable_learning_control.control.algos.tf2.lac.lac import LAC
 from stable_learning_control.control.algos.tf2.sac.sac import SAC
diff --git a/stable_learning_control/control/algos/tf2/common/get_lr_scheduler.py b/stable_learning_control/control/algos/tf2/common/get_lr_scheduler.py
index 476906bd3..d6945c764 100644
--- a/stable_learning_control/control/algos/tf2/common/get_lr_scheduler.py
+++ b/stable_learning_control/control/algos/tf2/common/get_lr_scheduler.py
@@ -48,4 +48,4 @@ def get_lr_scheduler(decaying_lr_type, lr_start, lr_final, steps):
 
         return lr_scheduler
     else:
-        return lambda step: lr_start  # Return a constant learning rate
+        return lambda step: lr_start  # Return a constant learning rate.
diff --git a/stable_learning_control/control/algos/tf2/common/helpers.py b/stable_learning_control/control/algos/tf2/common/helpers.py
index 00c029c0b..6162b19ed 100644
--- a/stable_learning_control/control/algos/tf2/common/helpers.py
+++ b/stable_learning_control/control/algos/tf2/common/helpers.py
@@ -20,7 +20,7 @@ def set_device(device_type="cpu"):
         str: The type of device that is used.
     """
     if device_type.lower() == "cpu":
-        tf.config.set_visible_devices([], "GPU")  # Force disable GPU
+        tf.config.set_visible_devices([], "GPU")  # Force disable GPU.
     log_to_std_out(f"Tensorflow is using the {device_type.upper()}.", type="info")
     return device_type.lower()
 
diff --git a/stable_learning_control/control/algos/tf2/lac/lac.py b/stable_learning_control/control/algos/tf2/lac/lac.py
index fe84dde69..21634243c 100644
--- a/stable_learning_control/control/algos/tf2/lac/lac.py
+++ b/stable_learning_control/control/algos/tf2/lac/lac.py
@@ -58,15 +58,15 @@
 nn = import_tf(module_name="tensorflow.nn")
 Adam = import_tf(module_name="tensorflow.keras.optimizers", class_name="Adam")
 
-# Import ray tuner if installed
+# Import ray tuner if installed.
 tune = lazy_importer(module_name="ray.tune")
 
-# Script settings
+# Script settings.
 SCALE_LAMBDA_MIN_MAX = (
     0.0,
     1.0,
-)  # Range of lambda lagrance multiplier
-SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier
+)  # Range of lambda lagrance multiplier.
+SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier.
 STD_OUT_LOG_VARS_DEFAULT = [
     "Epoch",
     "TotalEnvInteracts",
@@ -212,7 +212,7 @@ def __init__(  # noqa: C901
         }
         self._was_build = False
 
-        # Validate gymnasium env
+        # Validate gymnasium env.
         # NOTE: The current implementation only works with continuous spaces.
         if not is_gym_env(env):
             raise ValueError("Env must be a valid gymnasium environment.")
@@ -248,7 +248,7 @@ def __init__(  # noqa: C901
             type="info",
         )
 
-        # Store algorithm parameters
+        # Store algorithm parameters.
         self._act_dim = env.action_space.shape
         self._obs_dim = env.observation_space.shape
         self._device = set_device(device)
@@ -267,7 +267,7 @@ def __init__(  # noqa: C901
         else:
             self._target_entropy = target_entropy
 
-        # Create variables for the Lagrance multipliers
+        # Create variables for the Lagrance multipliers.
         # NOTE: Clip at 1e-37 to prevent log_alpha/log_lambda from becoming -np.inf
         self.log_alpha = tf.Variable(
             tf.math.log(1e-37 if alpha < 1e-37 else alpha), name="log_alpha"
@@ -290,7 +290,7 @@ def __init__(  # noqa: C901
 
         self._init_targets()
 
-        # Create optimizers
+        # Create optimizers.
         # NOTE: We here optimize for log_alpha and log_labda instead of alpha and labda
         # because it is more numerically stable (see:
         # https://github.com/rail-berkeley/softlearning/issues/136)
@@ -370,11 +370,11 @@ def update(self, data):
             o_
         )  # NOTE: Target actions come from *current* *target* policy
         l_pi_targ = self.ac_targ.L([o_, pi_targ_])
-        l_backup = r + self._gamma * (1 - d) * l_pi_targ  # The Lyapunov candidate
+        l_backup = r + self._gamma * (1 - d) * l_pi_targ  # The Lyapunov candidate.
 
-        # Compute Lyapunov Critic error gradients
+        # Compute Lyapunov Critic error gradients.
         with tf.GradientTape() as l_tape:
-            # Get current Lyapunov value
+            # Get current Lyapunov value.
             l1 = self.ac.L([o, a])
 
             # Calculate Lyapunov *CRITIC* error
@@ -391,7 +391,7 @@ def update(self, data):
         ################################################
         # Optimize Gaussian actor ######################
         ################################################
-        # Compute actor loss gradients
+        # Compute actor loss gradients.
         with tf.GradientTape() as a_tape:
             # Retrieve log probabilities of batch observations based on *current* policy
             _, logp_pi = self.ac.pi(o)
@@ -404,11 +404,11 @@ def update(self, data):
                     "if you need this."
                 )
 
-            # Get target lyapunov value
+            # Get target lyapunov value.
             pi_, _ = self.ac.pi(o_)  # NOTE: Target actions come from *current* policy
             lya_l_ = self.ac.L([o_, pi_])
 
-            # Compute Lyapunov Actor error
+            # Compute Lyapunov Actor error.
             l_delta = tf.reduce_mean(
                 lya_l_ - tf.stop_gradient(l1) + self._alpha3 * r
             )  # See Han eq. 11
@@ -429,9 +429,9 @@ def update(self, data):
         # Optimize alpha (Entropy temperature) #########
         ################################################
         if self._adaptive_temperature:
-            # Compute alpha loss gradients
+            # Compute alpha loss gradients.
             with tf.GradientTape() as alpha_tape:
-                # Calculate alpha loss
+                # Calculate alpha loss.
                 alpha_loss = -tf.reduce_mean(
                     self.alpha * tf.stop_gradient(logp_pi + self.target_entropy)
                 )  # See Haarnoja eq. 17
@@ -447,9 +447,9 @@ def update(self, data):
         # Optimize labda (Lyapunov temperature) ########
         ################################################
 
-        # Compute labda loss gradients
+        # Compute labda loss gradients.
         with tf.GradientTape() as lambda_tape:
-            # Calculate labda loss
+            # Calculate labda loss.
             # NOTE: Log_labda was used in the lambda_loss function because using
             # lambda caused the gradients to vanish. This is caused since we
             # restrict lambda within a 0-1.0 range using the clamp function
@@ -496,7 +496,7 @@ def save(self, path, checkpoint_name="checkpoint"):
         except Exception as e:
             raise Exception("LAC model could not be saved.") from e
 
-        # Save additional information
+        # Save additional information.
         save_info = {
             "alg_name": self.__class__.__name__,
             "setup_kwargs": self._setup_kwargs,
@@ -528,7 +528,7 @@ def restore(self, path, restore_lagrance_multipliers=False):
                     "path and try again."
                 )
 
-        # Store initial values in order to ignore them when loading the weights
+        # Store initial values in order to ignore them when loading the weights.
         lr_a = self._lr_a.value()
         lr_alpha = self._lr_alpha.value()
         lr_lag = self._lr_lag.value()
@@ -576,7 +576,7 @@ def export(self, path):
             obs_dummy = tf.random.uniform(
                 combine_shapes(1, self._obs_dim), dtype=tf.float32
             )
-            self.ac.pi.get_action(obs_dummy)  # Make sure the full graph was traced
+            self.ac.pi.get_action(obs_dummy)  # Make sure the full graph was traced.
             self.ac.pi.save(osp.join(path, "tf2_save"))
 
     def build(self):
@@ -912,7 +912,7 @@ def lac(  # noqa: C901
 
     env = env_fn()
 
-    # Validate gymnasium env
+    # Validate gymnasium env.
     # NOTE: The current implementation only works with continuous spaces.
     if not is_gym_env(env):
         raise ValueError("Env must be a valid gymnasium environment.")
@@ -959,9 +959,9 @@ def lac(  # noqa: C901
     hyper_paramet_dict = {
         k: v for k, v in locals().items() if k not in ["logger"]
     }  # Retrieve hyperparameters (Ignore logger object)
-    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger
+    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger.
 
-    # Retrieve max episode length
+    # Retrieve max episode length.
     if max_ep_len is None:
         max_ep_len = env.env._max_episode_steps
     else:
@@ -982,7 +982,7 @@ def lac(  # noqa: C901
     # Get default actor critic if no 'actor_critic' was supplied
     actor_critic = LyapunovActorCritic if actor_critic is None else actor_critic
 
-    # Set random seed for reproducible results
+    # Set random seed for reproducible results.
     if seed is not None:
         os.environ["PYTHONHASHSEED"] = str(seed)
         os.environ["TF_CUDNN_DETERMINISTIC"] = "1"  # new flag present in tf 2.0+
@@ -1007,13 +1007,13 @@ def lac(  # noqa: C901
         device,
     )
 
-    # Create learning rate schedulers
+    # Create learning rate schedulers.
     # NOTE: Alpha and labda currently use the same scheduler as the actor.
     lr_decay_ref_var = total_steps if lr_decay_ref.lower() == "steps" else epochs
     lr_a_scheduler = get_lr_scheduler(lr_decay_type, lr_a, lr_a_final, lr_decay_ref_var)
     lr_c_scheduler = get_lr_scheduler(lr_decay_type, lr_c, lr_c_final, lr_decay_ref_var)
 
-    # Restore policy if supplied
+    # Restore policy if supplied.
     if start_policy is not None:
         logger.log(f"Restoring model from '{start_policy}'.", type="info")
         try:
@@ -1035,7 +1035,7 @@ def lac(  # noqa: C901
         size=replay_size,
     )
 
-    # Count variables and print network structure
+    # Count variables and print network structure.
     var_counts = tuple(count_vars(module) for module in [policy.ac.pi, policy.ac.L])
     logger.log("Number of parameters: \t pi: %d, \t L: %d\n" % var_counts, type="info")
     logger.log("Network structure:\n", type="info")
@@ -1043,7 +1043,7 @@ def lac(  # noqa: C901
 
     logger.setup_tf_saver(policy)
 
-    # Setup diagnostics tb_write dict and store initial learning rates
+    # Setup diagnostics tb_write dict and store initial learning rates.
     diag_tb_log_list = [
         "ErrorL",
         "LossPi",
@@ -1092,7 +1092,7 @@ def lac(  # noqa: C901
         else:
             a = env.action_space.sample()
 
-        # Take step in the env
+        # Take step in the env.
         o_, r, d, truncated, _ = env.step(a)
         ep_ret += r
         ep_len += 1
@@ -1102,22 +1102,22 @@ def lac(  # noqa: C901
         # Make sure to update most recent observation!
         o = o_
 
-        # End of trajectory handling
+        # End of trajectory handling.
         if d or truncated:
             logger.store(EpRet=ep_ret, EpLen=ep_len)
             o, _ = env.reset()
             ep_ret, ep_len = 0, 0
 
-        # Update handling
+        # Update handling.
         if (t + 1) >= update_after and ((t + 1) - update_after) % update_every == 0:
-            # Step based learning rate decay
+            # Step based learning rate decay.
             if lr_decay_ref.lower() == "step":
                 lr_a_now = max(
                     lr_a_scheduler(t + 1), lr_a_final
-                )  # Make sure lr is bounded above final lr
+                )  # Make sure lr is bounded above final lr.
                 lr_c_now = max(
                     lr_c_scheduler(t + 1), lr_c_final
-                )  # Make sure lr is bounded above final lr
+                )  # Make sure lr is bounded above final lr.
                 policy.set_learning_rates(
                     lr_a=lr_a_now, lr_c=lr_c_now, lr_alpha=lr_a_now, lr_labda=lr_a_now
                 )
@@ -1125,8 +1125,8 @@ def lac(  # noqa: C901
             for _ in range(steps_per_update):
                 batch = replay_buffer.sample_batch(batch_size)
                 update_diagnostics = policy.update(data=batch)
-                logger.store(**update_diagnostics)  # Log diagnostics
-            # SGD batch tb logging
+                logger.store(**update_diagnostics)  # Log diagnostics.
+            # SGD batch tb logging.
             if use_tensorboard and not tb_low_log_freq:
                 logger.log_to_tb(keys=diag_tb_log_list, global_step=t)
 
@@ -1134,11 +1134,11 @@ def lac(  # noqa: C901
         if (t + 1) % steps_per_epoch == 0:
             epoch = (t + 1) // steps_per_epoch
 
-            # Save model
+            # Save model.
             if (epoch % save_freq == 0) or (epoch == epochs):
                 logger.save_state({"env": env}, itr=epoch)
 
-            # Test the performance of the deterministic version of the agent
+            # Test the performance of the deterministic version of the agent.
             if num_test_episodes != 0:
                 eps_ret, eps_len = test_agent(
                     policy, test_env, num_test_episodes, max_ep_len=max_ep_len
@@ -1149,19 +1149,19 @@ def lac(  # noqa: C901
                     extend=True,
                 )
 
-            # Epoch based learning rate decay
+            # Epoch based learning rate decay.
             if lr_decay_ref.lower() != "step":
                 lr_a_now = max(
                     lr_a_scheduler(epoch), lr_a_final
-                )  # Make sure lr is bounded above final
+                )  # Make sure lr is bounded above final.
                 lr_c_now = max(
                     lr_c_scheduler(epoch), lr_c_final
-                )  # Make sure lr is bounded above final
+                )  # Make sure lr is bounded above final.
                 policy.set_learning_rates(
                     lr_a=lr_a_now, lr_c=lr_c_now, lr_alpha=lr_a_now, lr_labda=lr_a_now
                 )
 
-            # Log performance measure to ray tuning
+            # Log performance measure to ray tuning.
             # NOTE: Only executed when the ray tuner invokes the script
             if hasattr(tune, "session") and tune.session._session is not None:
                 mean_ep_ret = logger.get_stats("EpRet")
@@ -1170,7 +1170,7 @@ def lac(  # noqa: C901
                     mean_ep_ret=mean_ep_ret[0], epoch=epoch, mean_ep_len=mean_ep_len[0]
                 )
 
-            # Log info about epoch
+            # Log info about epoch.
             logger.log_tabular("Epoch", epoch)
             logger.log_tabular("TotalEnvInteracts", t)
             logger.log_tabular(
@@ -1490,7 +1490,7 @@ def lac(  # noqa: C901
         ),
     )
 
-    # Parse logger related arguments
+    # Parse logger related arguments.
     parser.add_argument(
         "--exp_name",
         type=str,
@@ -1549,7 +1549,7 @@ def lac(  # noqa: C901
     )
     args = parser.parse_args()
 
-    # Setup actor critic arguments
+    # Setup actor critic arguments.
     output_activation = {}
     output_activation["actor"] = safer_eval(args.act_out_a, backend="tf")
     ac_kwargs = dict(
@@ -1564,7 +1564,7 @@ def lac(  # noqa: C901
         output_activation=output_activation,
     )
 
-    # Setup output dir for logger and return output kwargs
+    # Setup output dir for logger and return output kwargs.
     logger_kwargs = setup_logger_kwargs(
         args.exp_name,
         args.seed,
diff --git a/stable_learning_control/control/algos/tf2/policies/actors/squashed_gaussian_actor.py b/stable_learning_control/control/algos/tf2/policies/actors/squashed_gaussian_actor.py
index 2843b7ada..87bc2bcb6 100644
--- a/stable_learning_control/control/algos/tf2/policies/actors/squashed_gaussian_actor.py
+++ b/stable_learning_control/control/algos/tf2/policies/actors/squashed_gaussian_actor.py
@@ -113,7 +113,7 @@ def call(self, obs, deterministic=False, with_logprob=True):
                 - pi_action (:obj:`tensorflow.Tensor`): The actions given by the policy.
                 - logp_pi (:obj:`tensorflow.Tensor`): The log probabilities of each of these actions.
         """  # noqa: E501
-        # Calculate mean action and standard deviation
+        # Calculate mean action and standard deviation.
         net_out = self.net(obs)
         mu = self.mu_layer(net_out)
         log_std = self.log_std_layer(net_out)
@@ -127,19 +127,19 @@ def call(self, obs, deterministic=False, with_logprob=True):
         if deterministic:
             pi_action = mu  # determinestic action used at test time.
         else:
-            # Sample from the normal distribution and calculate the action
+            # Sample from the normal distribution and calculate the action.
             batch_size = tf.shape(input=obs)[0]
             epsilon = self._normal_distribution.sample(batch_size)
             pi_action = affine_bijector.forward(
                 epsilon
-            )  # Transform action as it was sampled from the policy distribution
+            )  # Transform action as it was sampled from the policy distribution.
 
         # Squash the action between (-1 and 1)
         pi_action = self._squash_bijector.forward(pi_action)
 
         # Compute logprob from Gaussian, and then apply correction for Tanh squashing.
         if with_logprob:
-            # Transform base_distribution to the policy distribution
+            # Transform base_distribution to the policy distribution.
             reparm_trick_bijector = tfp.bijectors.Chain(
                 (self._squash_bijector, affine_bijector)
             )
@@ -150,7 +150,7 @@ def call(self, obs, deterministic=False, with_logprob=True):
         else:
             logp_pi = None
 
-        # Clamp the actions such that they are in range of the environment
+        # Clamp the actions such that they are in range of the environment.
         if self.act_limits is not None:
             pi_action = clamp(
                 pi_action,
diff --git a/stable_learning_control/control/algos/tf2/sac/sac.py b/stable_learning_control/control/algos/tf2/sac/sac.py
index 0a5107b8f..9006bb28d 100644
--- a/stable_learning_control/control/algos/tf2/sac/sac.py
+++ b/stable_learning_control/control/algos/tf2/sac/sac.py
@@ -58,15 +58,15 @@
 nn = import_tf(module_name="tensorflow.nn")
 Adam = import_tf(module_name="tensorflow.keras.optimizers", class_name="Adam")
 
-# Import ray tuner if installed
+# Import ray tuner if installed.
 tune = lazy_importer(module_name="ray.tune")
 
-# Script settings
+# Script settings.
 SCALE_LAMBDA_MIN_MAX = (
     0.0,
     1.0,
-)  # Range of lambda lagrance multiplier
-SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier
+)  # Range of lambda lagrance multiplier.
+SCALE_ALPHA_MIN_MAX = (0.0, np.inf)  # Range of alpha lagrance multiplier.
 STD_OUT_LOG_VARS_DEFAULT = [
     "Epoch",
     "TotalEnvInteracts",
@@ -204,7 +204,7 @@ def __init__(  # noqa: C901
         }
         self._was_build = False
 
-        # Validate gymnasium env
+        # Validate gymnasium env.
         # NOTE: The current implementation only works with continuous spaces.
         if not is_gym_env(env):
             raise ValueError("Env must be a valid gymnasium environment.")
@@ -240,7 +240,7 @@ def __init__(  # noqa: C901
             type="info",
         )
 
-        # Store algorithm parameters
+        # Store algorithm parameters.
         self._act_dim = env.action_space.shape
         self._obs_dim = env.observation_space.shape
         self._device = set_device(device)
@@ -257,7 +257,7 @@ def __init__(  # noqa: C901
         else:
             self._target_entropy = target_entropy
 
-        # Create variables for the Lagrance multipliers
+        # Create variables for the Lagrance multipliers.
         # NOTE: Clip at 1e-37 to prevent log_alpha/log_lambda from becoming -np.inf
         self.log_alpha = tf.Variable(
             tf.math.log(1e-37 if alpha < 1e-37 else alpha), name="log_alpha"
@@ -277,7 +277,7 @@ def __init__(  # noqa: C901
 
         self._init_targets()
 
-        # Create optimizers
+        # Create optimizers.
         # NOTE: We here optimize for log_alpha instead of alpha because it is more
         # numerically stable (see:
         # https://github.com/rail-berkeley/softlearning/issues/136)
@@ -354,7 +354,7 @@ def update(self, data):
             o_
         )  # NOTE: Target actions coming from *current* policy
 
-        # Get target Q values based on optimization type
+        # Get target Q values based on optimization type.
         q1_pi_targ = self.ac_targ.Q1([o_, pi_])
         q2_pi_targ = self.ac_targ.Q2([o_, pi_])
         if self._opt_type.lower() == "minimize":
@@ -365,12 +365,12 @@ def update(self, data):
         else:
             q_pi_targ = tf.math.minimum(
                 q1_pi_targ, q2_pi_targ
-            )  # Use min clipping to prevent overestimation bias
+            )  # Use min clipping to prevent overestimation bias.
         q_backup = r + self._gamma * (1 - d) * (q_pi_targ - self.alpha * logp_pi_)
 
         # Compute the Q-Critic loss gradients
         with tf.GradientTape() as q_tape:
-            # Retrieve the current Q values
+            # Retrieve the current Q values.
             q1 = self.ac.Q1([o, a])
             q2 = self.ac.Q2([o, a])
 
@@ -387,12 +387,12 @@ def update(self, data):
         ################################################
         # Optimize Gaussian actor ######################
         ################################################
-        # Compute actor loss gradients
+        # Compute actor loss gradients.
         with tf.GradientTape() as a_tape:
             # Retrieve log probabilities of batch observations based on *current* policy
             pi, logp_pi = self.ac.pi(o)
 
-            # Retrieve current Q values
+            # Retrieve current Q values.
             # NOTE: Actions come from *current* policy
             q1_pi = self.ac.Q1([o, pi])
             q2_pi = self.ac.Q2([o, pi])
@@ -420,9 +420,9 @@ def update(self, data):
         # Optimize alpha (Entropy temperature) #########
         ################################################
         if self._adaptive_temperature:
-            # Compute alpha loss gradients
+            # Compute alpha loss gradients.
             with tf.GradientTape() as alpha_tape:
-                # Calculate alpha loss
+                # Calculate alpha loss.
                 alpha_loss = -tf.reduce_mean(
                     self.alpha * tf.stop_gradient(logp_pi + self.target_entropy)
                 )  # See Haarnoja eq. 17
@@ -468,7 +468,7 @@ def save(self, path, checkpoint_name="checkpoint"):
         except Exception as e:
             raise Exception("SAC model could not be saved.") from e
 
-        # Save additional information
+        # Save additional information.
         save_info = {
             "alg_name": self.__class__.__name__,
             "setup_kwargs": self._setup_kwargs,
@@ -500,7 +500,7 @@ def restore(self, path, restore_lagrance_multipliers=False):
                     "path and try again."
                 )
 
-        # Store initial values in order to ignore them when loading the weights
+        # Store initial values in order to ignore them when loading the weights.
         lr_a = self._lr_a.value()
         lr_alpha = self._lr_alpha.value()
         lr_c = self._lr_c.value()
@@ -544,7 +544,7 @@ def export(self, path):
             obs_dummy = tf.random.uniform(
                 combine_shapes(1, self._obs_dim), dtype=tf.float32
             )
-            self.ac.pi.get_action(obs_dummy)  # Make sure the full graph was traced
+            self.ac.pi.get_action(obs_dummy)  # Make sure the full graph was traced.
             self.ac.pi.save(osp.join(path, "tf2_save"))
 
     def build(self):
@@ -858,7 +858,7 @@ def sac(  # noqa: C901
 
     env = env_fn()
 
-    # Validate gymnasium env
+    # Validate gymnasium env.
     # NOTE: The current implementation only works with continuous spaces.
     if not is_gym_env(env):
         raise ValueError("Env must be a valid gymnasium environment.")
@@ -905,9 +905,9 @@ def sac(  # noqa: C901
     hyper_paramet_dict = {
         k: v for k, v in locals().items() if k not in ["logger"]
     }  # Retrieve hyperparameters (Ignore logger object)
-    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger
+    logger.save_config(hyper_paramet_dict)  # Write hyperparameters to logger.
 
-    # Retrieve max episode length
+    # Retrieve max episode length.
     if max_ep_len is None:
         max_ep_len = env.env._max_episode_steps
     else:
@@ -928,7 +928,7 @@ def sac(  # noqa: C901
     # Get default actor critic if no 'actor_critic' was supplied
     actor_critic = SoftActorCritic if actor_critic is None else actor_critic
 
-    # Set random seed for reproducible results
+    # Set random seed for reproducible results.
     if seed is not None:
         os.environ["PYTHONHASHSEED"] = str(seed)
         os.environ["TF_CUDNN_DETERMINISTIC"] = "1"  # new flag present in tf 2.0+
@@ -951,13 +951,13 @@ def sac(  # noqa: C901
         device,
     )
 
-    # Create learning rate schedulers
+    # Create learning rate schedulers.
     # NOTE: Alpha currently uses the same scheduler as the actor.
     lr_decay_ref_var = total_steps if lr_decay_ref.lower() == "steps" else epochs
     lr_a_scheduler = get_lr_scheduler(lr_decay_type, lr_a, lr_a_final, lr_decay_ref_var)
     lr_c_scheduler = get_lr_scheduler(lr_decay_type, lr_c, lr_c_final, lr_decay_ref_var)
 
-    # Restore policy if supplied
+    # Restore policy if supplied.
     if start_policy is not None:
         logger.log(f"Restoring model from '{start_policy}'.", type="info")
         try:
@@ -979,7 +979,7 @@ def sac(  # noqa: C901
         size=replay_size,
     )
 
-    # Count variables and print network structure
+    # Count variables and print network structure.
     var_counts = tuple(
         count_vars(module) for module in [policy.ac.pi, policy.ac.Q1, policy.ac.Q2]
     )
@@ -992,7 +992,7 @@ def sac(  # noqa: C901
 
     logger.setup_tf_saver(policy)
 
-    # Setup diagnostics tb_write dict and store initial learning rates
+    # Setup diagnostics tb_write dict and store initial learning rates.
     diag_tb_log_list = ["LossQ", "LossPi", "Alpha", "LossAlpha", "Entropy"]
     if use_tensorboard:
         logger.log_to_tb(
@@ -1027,7 +1027,7 @@ def sac(  # noqa: C901
         else:
             a = env.action_space.sample()
 
-        # Take step in the env
+        # Take step in the env.
         o_, r, d, truncated, _ = env.step(a)
         ep_ret += r
         ep_len += 1
@@ -1037,22 +1037,22 @@ def sac(  # noqa: C901
         # Make sure to update most recent observation!
         o = o_
 
-        # End of trajectory handling
+        # End of trajectory handling.
         if d or truncated:
             logger.store(EpRet=ep_ret, EpLen=ep_len)
             o, _ = env.reset()
             ep_ret, ep_len = 0, 0
 
-        # Update handling
+        # Update handling.
         if (t + 1) >= update_after and ((t + 1) - update_after) % update_every == 0:
-            # Step based learning rate decay
+            # Step based learning rate decay.
             if lr_decay_ref.lower() == "step":
                 lr_a_now = max(
                     lr_a_scheduler(t + 1), lr_a_final
-                )  # Make sure lr is bounded above final lr
+                )  # Make sure lr is bounded above final lr.
                 lr_c_now = max(
                     lr_c_scheduler(t + 1), lr_c_final
-                )  # Make sure lr is bounded above final lr
+                )  # Make sure lr is bounded above final lr.
                 policy.set_learning_rates(
                     lr_a=lr_a_now, lr_c=lr_c_now, lr_alpha=lr_a_now
                 )
@@ -1060,9 +1060,9 @@ def sac(  # noqa: C901
             for _ in range(steps_per_update):
                 batch = replay_buffer.sample_batch(batch_size)
                 update_diagnostics = policy.update(data=batch)
-                logger.store(**update_diagnostics)  # Log diagnostics
+                logger.store(**update_diagnostics)  # Log diagnostics.
 
-            # SGD batch tb logging
+            # SGD batch tb logging.
             if use_tensorboard and not tb_low_log_freq:
                 logger.log_to_tb(keys=diag_tb_log_list, global_step=t)
 
@@ -1070,11 +1070,11 @@ def sac(  # noqa: C901
         if (t + 1) % steps_per_epoch == 0:
             epoch = (t + 1) // steps_per_epoch
 
-            # Save model
+            # Save model.
             if (epoch % save_freq == 0) or (epoch == epochs):
                 logger.save_state({"env": env}, itr=epoch)
 
-            # Test the performance of the deterministic version of the agent
+            # Test the performance of the deterministic version of the agent.
             if num_test_episodes != 0:
                 eps_ret, eps_len = test_agent(
                     policy, test_env, num_test_episodes, max_ep_len=max_ep_len
@@ -1085,19 +1085,19 @@ def sac(  # noqa: C901
                     extend=True,
                 )
 
-            # Epoch based learning rate decay
+            # Epoch based learning rate decay.
             if lr_decay_ref.lower() != "step":
                 lr_a_now = max(
                     lr_a_scheduler(epoch), lr_a_final
-                )  # Make sure lr is bounded above final
+                )  # Make sure lr is bounded above final.
                 lr_c_now = max(
                     lr_c_scheduler(epoch), lr_c_final
-                )  # Make sure lr is bounded above final
+                )  # Make sure lr is bounded above final.
                 policy.set_learning_rates(
                     lr_a=lr_a_now, lr_c=lr_c_now, lr_alpha=lr_a_now
                 )
 
-            # Log performance measure to ray tuning
+            # Log performance measure to ray tuning.
             # NOTE: Only executed when the ray tuner invokes the script
             if hasattr(tune, "session") and tune.session._session is not None:
                 mean_ep_ret = logger.get_stats("EpRet")
@@ -1106,7 +1106,7 @@ def sac(  # noqa: C901
                     mean_ep_ret=mean_ep_ret[0], epoch=epoch, mean_ep_len=mean_ep_len[0]
                 )
 
-            # Log info about epoch
+            # Log info about epoch.
             logger.log_tabular("Epoch", epoch)
             logger.log_tabular("TotalEnvInteracts", t)
             logger.log_tabular(
@@ -1405,7 +1405,7 @@ def sac(  # noqa: C901
         ),
     )
 
-    # Parse logger related arguments
+    # Parse logger related arguments.
     parser.add_argument(
         "--exp_name",
         type=str,
@@ -1464,7 +1464,7 @@ def sac(  # noqa: C901
     )
     args = parser.parse_args()
 
-    # Setup actor critic arguments
+    # Setup actor critic arguments.
     output_activation = {}
     output_activation["actor"] = safer_eval(args.act_out_a, backend="tf")
     output_activation["critic"] = safer_eval(args.act_out_c, backend="tf")
@@ -1480,7 +1480,7 @@ def sac(  # noqa: C901
         output_activation=output_activation,
     )
 
-    # Setup output dir for logger and return output kwargs
+    # Setup output dir for logger and return output kwargs.
     logger_kwargs = setup_logger_kwargs(
         args.exp_name,
         args.seed,
diff --git a/stable_learning_control/control/common/buffers.py b/stable_learning_control/control/common/buffers.py
index d8da36452..0b8fa8e8b 100644
--- a/stable_learning_control/control/common/buffers.py
+++ b/stable_learning_control/control/common/buffers.py
@@ -172,7 +172,7 @@ def __init__(
                 type="warning",
             )
 
-        # Main buffers
+        # Main buffers.
         self.obs_buf = atleast_2d(
             np.zeros(combine_shapes(size, obs_dim), dtype=np.float32).squeeze()
         )
@@ -187,13 +187,13 @@ def __init__(
         ).squeeze()
         self.done_buf = np.zeros(int(size), dtype=np.float32)
 
-        # Optional buffers
+        # Optional buffers.
         self.adv_buf = np.zeros(size, dtype=np.float32)
         self.ret_buf = np.zeros(size, dtype=np.float32)
         self.val_buf = np.zeros(size, dtype=np.float32)
         self.logp_buf = np.zeros(size, dtype=np.float32)
 
-        # Store buffer attributes
+        # Store buffer attributes.
         self.ptr, self.traj_ptr, self.n_traj, self._max_size = 0, 0, 0, size
         self.traj_ptrs = []
         self.traj_lengths = []
@@ -216,9 +216,9 @@ def store(self, obs, act, rew, next_obs, done, val=None, logp=None):  # noqa: C9
             logp (numpy.ndarray, optional): The log probabilities of the actions.
                 Defaults to ``None``.
         """
-        assert self.ptr < self._max_size  # buffer has to have room so you can store
+        assert self.ptr < self._max_size  # buffer has to have room so you can store.
 
-        # Fill primary buffer
+        # Fill primary buffer.
         try:
             self.obs_buf[self.ptr] = obs
             self.obs_next_buf[self.ptr] = next_obs
@@ -254,7 +254,7 @@ def store(self, obs, act, rew, next_obs, done, val=None, logp=None):  # noqa: C9
             )
             raise ValueError(error_msg)
 
-        # Fill optional buffer
+        # Fill optional buffer.
         if val:
             try:
                 self.val_buf[self.ptr] = val
@@ -276,7 +276,7 @@ def store(self, obs, act, rew, next_obs, done, val=None, logp=None):  # noqa: C9
                 raise ValueError(error_msg)
             self._contains_logp = True
 
-        # Increase buffer pointers
+        # Increase buffer pointers.
         self.ptr += 1
 
     def finish_path(self, last_val=0):
@@ -300,7 +300,7 @@ def finish_path(self, last_val=0):
 
         # Calculate the advantage and rewards-to-go if buffer contains vals
         if self._contains_vals:
-            # Get the current trajectory
+            # Get the current trajectory.
             path_slice = slice(self.traj_ptr, self.ptr)
             rews = np.append(self.rew_buf[path_slice], last_val)
             vals = np.append(self.val_buf[path_slice], last_val)
@@ -312,7 +312,7 @@ def finish_path(self, last_val=0):
             # the next line computes rewards-to-go, to be targets for the value function
             self.ret_buf[path_slice] = discount_cumsum(rews, self._gamma)[:-1]
 
-        # Store trajectory length and update trajectory pointers
+        # Store trajectory length and update trajectory pointers.
         self.traj_lengths.append(self.ptr - self.traj_ptr)
         self.traj_ptrs.append(self.traj_ptr)
         self.traj_ptr = self.ptr
@@ -337,10 +337,10 @@ def get(self, flat=False):  # noqa: C901
         Returns:
             dict: The trajectory buffer.
         """
-        if not self._preempt:  # Check if buffer was full
+        if not self._preempt:  # Check if buffer was full.
             assert self.ptr == self._max_size
 
-        # Remove incomplete trajectories
+        # Remove incomplete trajectories.
         if not self._incomplete and self.traj_ptr != self.ptr:
             if not self._incomplete_warn:
                 log_to_std_out(
@@ -353,7 +353,7 @@ def get(self, flat=False):  # noqa: C901
         else:
             buffer_end_ptr = self.ptr
 
-        # Remove trajectories that are to short
+        # Remove trajectories that are to short.
         if self.traj_lengths[-1] < self._min_traj_size:
             if not self._min_traj_size_warn:
                 log_to_std_out(
@@ -367,7 +367,7 @@ def get(self, flat=False):  # noqa: C901
             buffer_end_ptr = self.traj_ptr - self.traj_lengths[-1]
             self.traj_lengths = self.traj_lengths[:-1]
 
-        # Create trajectory buffer dictionary
+        # Create trajectory buffer dictionary.
         buff_slice = slice(0, buffer_end_ptr)
         if flat:
             data = dict(
@@ -396,9 +396,9 @@ def get(self, flat=False):  # noqa: C901
             if self._contains_logp:
                 data["lopg"] = np.split(self.logp_buf[buff_slice], self.traj_ptrs[1:])
 
-        # Reset buffer and traj indexes
+        # Reset buffer and traj indexes.
         self.ptr, self.traj_ptr, self.traj_ptrs, self.n_traj = 0, 0, [], 0
         self.traj_lengths = []
 
-        # Return experience tuple
+        # Return experience tuple.
         return data
diff --git a/stable_learning_control/control/common/helpers.py b/stable_learning_control/control/common/helpers.py
index d857bdfdd..639e18e8d 100644
--- a/stable_learning_control/control/common/helpers.py
+++ b/stable_learning_control/control/common/helpers.py
@@ -74,7 +74,7 @@ def get_activation_function(activation_fn_name, backend="torch"):
     else:
         backend_prefix = ["torch", "nn"]
 
-    # Retrieve activation function
+    # Retrieve activation function.
     if len(activation_fn_name.split(".")) == 1:
         activation_fn_name = ".".join(backend_prefix) + "." + activation_fn_name
     elif len(activation_fn_name.split(".")) == 2:
diff --git a/stable_learning_control/control/utils/eval_robustness.py b/stable_learning_control/control/utils/eval_robustness.py
index 4c93d497a..7fe06a703 100644
--- a/stable_learning_control/control/utils/eval_robustness.py
+++ b/stable_learning_control/control/utils/eval_robustness.py
@@ -170,7 +170,7 @@ def run_disturbed_policy(  # noqa: C901
         verbose_fmt="table", output_dir=output_dir, output_fname="eval_statistics.csv"
     )
 
-    # Increase action space
+    # Increase action space.
     # NOTE: Needed to prevent the disturbance from being clipped by the action space.
     env.unwrapped.action_space.high = np.array(
         [np.finfo(np.float32).max for item in env.unwrapped.action_space.high]
@@ -179,7 +179,7 @@ def run_disturbed_policy(  # noqa: C901
         [np.finfo(np.float32).min for item in env.unwrapped.action_space.low]
     )
 
-    # Increase max episode length if requested
+    # Increase max episode length if requested.
     if max_ep_len is None:
         max_ep_len = env._max_episode_steps
     else:
@@ -195,7 +195,7 @@ def run_disturbed_policy(  # noqa: C901
             )
             env._max_episode_steps = max_ep_len
 
-    # Try to retrieve default type and variant if not supplied
+    # Try to retrieve default type and variant if not supplied.
     if disturbance_type is None:
         if hasattr(env.unwrapped, "_disturber_cfg"):
             if "default_type" in env.unwrapped._disturber_cfg.keys():
@@ -247,14 +247,14 @@ def run_disturbed_policy(  # noqa: C901
         env.disturbance_info["type"]
         if hasattr(env, "disturbance_info") and "type" in env.disturbance_info.keys()
         else disturbance_type
-    )  # Retrieve used disturbance type
+    )  # Retrieve used disturbance type.
     disturbance_variant = (
         env.disturbance_info["variant"]
         if hasattr(env, "disturbance_info") and "variant" in env.disturbance_info.keys()
         else disturbance_variant
-    )  # Retrieve used disturbance variant
+    )  # Retrieve used disturbance variant.
 
-    # Loop though all disturbances till disturber is done
+    # Loop though all disturbances till disturber is done.
     logger.log("Starting robustness evaluation...", type="info")
     render_error = False
     path = {
@@ -272,11 +272,11 @@ def run_disturbed_policy(  # noqa: C901
     ) = ([], [], [], [])
     n_disturbance = 0
     soi_found, ref_found = True, True
-    supports_deterministic = True  # Only supported with gaussian algorithms
+    supports_deterministic = True  # Only supported with gaussian algorithms.
     while not env.disturber_done:
         o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
         while n < num_episodes:
-            # Render env if requested
+            # Render env if requested.
             if render and not render_error:
                 try:
                     env.render()
@@ -292,7 +292,7 @@ def run_disturbed_policy(  # noqa: C901
                         type="warning",
                     )
 
-            # Retrieve action
+            # Retrieve action.
             if deterministic and supports_deterministic:
                 try:
                     a = policy.get_action(o, deterministic=deterministic)
@@ -343,7 +343,7 @@ def run_disturbed_policy(  # noqa: C901
                     type="warning",
                 )
 
-            # Store performance measurements
+            # Store performance measurements.
             if d or (ep_len == max_ep_len):
                 died = ep_len < max_ep_len
                 logger.store(EpRet=ep_ret, EpLen=ep_len, DeathRate=(float(died)))
@@ -352,24 +352,24 @@ def run_disturbed_policy(  # noqa: C901
                     % (n, ep_ret, ep_len, died)
                 )
 
-                # Store observations
+                # Store observations.
                 o_episode_df = pd.DataFrame(path["o"])
                 o_episode_df.insert(0, "step", range(0, ep_len))
                 o_episode_df = pd.melt(
                     o_episode_df,
                     id_vars="step",
                     var_name="observation",
-                )  # Flatten dataframe
+                )  # Flatten dataframe.
                 o_episodes_dfs.append(o_episode_df)
 
-                # Store episode rewards
+                # Store episode rewards.
                 r_episode_df = pd.DataFrame(
                     {"step": range(0, ep_len), "reward": path["r"]}
                 )
                 r_episode_df.insert(len(r_episode_df.columns), "episode", n)
                 r_episodes_dfs.append(r_episode_df)
 
-                # Store states of interest
+                # Store states of interest.
                 if soi_found:
                     soi_episode_df = pd.DataFrame(path["state_of_interest"])
                     soi_episode_df.insert(0, "step", range(0, ep_len))
@@ -378,10 +378,10 @@ def run_disturbed_policy(  # noqa: C901
                         id_vars="step",
                         var_name="state_of_interest",
                         value_name="error",
-                    )  # Flatten dataframe
+                    )  # Flatten dataframe.
                     soi_episodes_dfs.append(soi_episode_df)
 
-                # Store reference
+                # Store reference.
                 if ref_found:
                     ref_episode_df = pd.DataFrame(path["reference"])
                     ref_episode_df.insert(0, "step", range(0, ep_len))
@@ -389,10 +389,10 @@ def run_disturbed_policy(  # noqa: C901
                         ref_episode_df,
                         id_vars="step",
                         var_name="reference",
-                    )  # Flatten dataframe
+                    )  # Flatten dataframe.
                     ref_episodes_dfs.append(ref_episode_df)
 
-                # Increment counters and reset storage variables
+                # Increment counters and reset storage variables.
                 n += 1
                 o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0
                 path = {
@@ -402,7 +402,7 @@ def run_disturbed_policy(  # noqa: C901
                     "state_of_interest": [],
                 }
 
-        # Print robustness evaluation diagnostics
+        # Print robustness evaluation diagnostics.
         if hasattr(env, "disturbance_info") and "type" in env.disturbance_info.keys():
             logger.log_tabular(
                 "DisturbanceType",
@@ -441,7 +441,7 @@ def run_disturbed_policy(  # noqa: C901
         log_to_std_out("")
         logger.dump_tabular()
 
-        # Add extra disturbance information to the robustness eval dataframe
+        # Add extra disturbance information to the robustness eval dataframe.
         disturbance_label = (
             env.disturbance_info["label"]
             if (
@@ -483,7 +483,7 @@ def run_disturbed_policy(  # noqa: C901
         )
         ref_disturbances_dfs.append(ref_disturbance_df)
 
-        # Reset storage buckets and go to next disturbance
+        # Reset storage buckets and go to next disturbance.
         o_episodes_dfs = []
         r_episodes_dfs = []
         soi_episodes_dfs = []
@@ -491,7 +491,7 @@ def run_disturbed_policy(  # noqa: C901
         env.next_disturbance()
         n_disturbance += 1
 
-    # Merge robustness evaluation information for all disturbances
+    # Merge robustness evaluation information for all disturbances.
     o_disturbances_df = pd.concat(o_disturbances_dfs, ignore_index=True)
     r_disturbances_df = pd.concat(r_disturbances_dfs, ignore_index=True)
     soi_disturbances_df = pd.concat(soi_disturbances_dfs, ignore_index=True)
@@ -526,7 +526,7 @@ def run_disturbed_policy(  # noqa: C901
         disturbance_variant,
     )
 
-    # Save robustness evaluation dataframe and return it to the user
+    # Save robustness evaluation dataframe and return it to the user.
     if save_result:
         results_path = logger.output_dir.joinpath("results.csv")
         logger.log(
@@ -583,7 +583,7 @@ def plot_robustness_results(  # noqa: C901
             else None
         )
 
-    # Unpack required data from dataframe
+    # Unpack required data from dataframe.
     obs_found, rew_found, soi_found, ref_found = True, True, True, True
     o_disturbances_df, ref_disturbances_df = pd.DataFrame(), pd.DataFrame()
     if "observation" in dataframe["variable"].unique():
@@ -611,13 +611,13 @@ def plot_robustness_results(  # noqa: C901
     else:
         ref_found = False
 
-    # Merge observations and references
+    # Merge observations and references.
     if obs_found:
         obs_df_tmp = o_disturbances_df.copy(deep=True)
         obs_df_tmp["signal"] = "obs_" + (obs_df_tmp["observation"] + 1).astype(str)
         obs_df_tmp.insert(len(obs_df_tmp.columns), "type", "observation")
 
-        # Retrieve the requested observations
+        # Retrieve the requested observations.
         observations = validate_observations(observations, o_disturbances_df)
         observations = [obs - 1 for obs in observations]  # Humans count from 1
         obs_df_tmp = obs_df_tmp.query(f"observation in {observations}")
@@ -627,7 +627,7 @@ def plot_robustness_results(  # noqa: C901
         ref_df_tmp.insert(len(ref_df_tmp.columns), "type", "reference")
     obs_ref_df = pd.concat([obs_df_tmp, ref_df_tmp], ignore_index=True)
 
-    # Loop though all disturbances and plot the observations and references in one plot
+    # Loop though all disturbances and plot the observations and references in one plot.
     fig_title = "{} under several {}{}.".format(
         "Observation and reference"
         if all([obs_found, ref_found])
@@ -640,7 +640,7 @@ def plot_robustness_results(  # noqa: C901
     obs_ref_df.loc[obs_ref_df["disturbance_index"] == 0, "disturbance"] = (
         obs_ref_df.loc[obs_ref_df["disturbance_index"] == 0, "disturbance"]
         + " (original)"
-    )  # Append original to original value
+    )  # Append original to original value.
     if not merged:
         num_plots = len(obs_ref_df.disturbance.unique())
         total_cols = 3
@@ -676,7 +676,7 @@ def plot_robustness_results(  # noqa: C901
         ).set_title(fig_title)
     figs["observations"].append(fig)
 
-    # Plot mean cost
+    # Plot mean cost.
     if rew_found:
         fig = plt.figure(tight_layout=True)
         figs["costs"].append(fig)
@@ -687,7 +687,7 @@ def plot_robustness_results(  # noqa: C901
                 r_disturbances_df["disturbance_index"] == 0, "disturbance"
             ]
             + " (original)"
-        )  # Append original to original value
+        )  # Append original to original value.
         sns.lineplot(
             data=r_disturbances_df, x="step", y="reward", ci="sd", hue="disturbance"
         ).set_title(
@@ -707,7 +707,7 @@ def plot_robustness_results(  # noqa: C901
             type="warning",
         )
 
-    # Plot states of interest
+    # Plot states of interest.
     if soi_found:
         n_soi = soi_disturbances_df["state_of_interest"].max() + 1
         soi_disturbances_df.loc[
@@ -717,7 +717,7 @@ def plot_robustness_results(  # noqa: C901
                 soi_disturbances_df["disturbance_index"] == 0, "disturbance"
             ]
             + " (original)"
-        )  # Append original to original value
+        )  # Append original to original value.
         for index in range(0, n_soi):
             fig = plt.figure(tight_layout=True)
             figs["states_of_interest"].append(fig)
@@ -746,7 +746,7 @@ def plot_robustness_results(  # noqa: C901
             type="warning",
         )
 
-    # Save plots
+    # Save plots.
     if save_figs:
         figs_path = output_dir.joinpath("figures")
         figs_extension = figs_fmt[1:] if figs_fmt.startswith(".") else figs_fmt
@@ -901,10 +901,10 @@ def plot_robustness_results(  # noqa: C901
     )
     args = parser.parse_args()
 
-    # Load policy and environment
+    # Load policy and environment.
     env, policy = load_policy_and_env(args.fpath, args.itr if args.itr >= 0 else "last")
 
-    # List d_type or d_variant if requested
+    # List d_type or d_variant if requested.
     if args.list_disturbance_types or args.list_disturbance_variants:
         if hasattr(env.unwrapped, "_disturber_cfg"):
             if args.list_disturbance_types:
@@ -981,11 +981,11 @@ def plot_robustness_results(  # noqa: C901
             log_to_std_out(friendly_err(error_msg))
         sys.exit()
 
-    # Retrieve output_dir
+    # Retrieve output_dir.
     if not args.data_dir:
         args.data_dir = args.fpath
 
-    # Perform robustness evaluation
+    # Perform robustness evaluation.
     run_results_df = run_disturbed_policy(
         env,
         policy,
diff --git a/stable_learning_control/control/utils/eval_utils.py b/stable_learning_control/control/utils/eval_utils.py
index 2d4304740..4e685d83b 100644
--- a/stable_learning_control/control/utils/eval_utils.py
+++ b/stable_learning_control/control/utils/eval_utils.py
@@ -23,7 +23,7 @@ def test_agent(policy, env, num_episodes, max_ep_len=None):
         o, _ = env.reset()
         d, truncated, ep_ret, ep_len = False, False, 0, 0
         while not (d or truncated):
-            # Take deterministic actions at test time
+            # Take deterministic actions at test time.
             o, r, d, truncated, _ = env.step(policy.get_action(o, True))
             ep_ret += r
             ep_len += 1
diff --git a/stable_learning_control/control/utils/gym_utils.py b/stable_learning_control/control/utils/gym_utils.py
index f13d40b4a..3062417e5 100644
--- a/stable_learning_control/control/utils/gym_utils.py
+++ b/stable_learning_control/control/utils/gym_utils.py
@@ -67,7 +67,7 @@ def validate_gym_env(arg_dict):
         AssertError: Raised when a environment is supplied that is not a valid gymnasium
             environment.
     """
-    # Import gymnasium environments
+    # Import gymnasium environments.
     # import gymnasium as gym
 
     # Import environment configuration file. This file can be used to inject
diff --git a/stable_learning_control/control/utils/plot.py b/stable_learning_control/control/utils/plot.py
index 89252c5fb..03890c028 100644
--- a/stable_learning_control/control/utils/plot.py
+++ b/stable_learning_control/control/utils/plot.py
@@ -195,18 +195,18 @@ def get_all_datasets(all_logdirs, legend=None, select=None, exclude=None):
     if exclude is not None:
         logdirs = [log for log in logdirs if all(not (x in log) for x in exclude)]
 
-    # Verify logdirs
+    # Verify logdirs.
     print("Plotting from...\n" + "=" * DIV_LINE_WIDTH + "\n")
     for logdir in logdirs:
         print(logdir)
     print("\n" + "=" * DIV_LINE_WIDTH)
 
-    # Make sure the legend is compatible with the logdirs
+    # Make sure the legend is compatible with the logdirs.
     assert not (legend) or (len(legend) == len(logdirs)), friendly_err(
         "Must give a legend title for each set of experiments."
     )
 
-    # Load data from logdirs
+    # Load data from logdirs.
     data = []
     if legend:
         for log, leg in zip(logdirs, legend):
diff --git a/stable_learning_control/control/utils/run_utils.py b/stable_learning_control/control/utils/run_utils.py
index acd96554f..ec34d0582 100644
--- a/stable_learning_control/control/utils/run_utils.py
+++ b/stable_learning_control/control/utils/run_utils.py
@@ -71,10 +71,10 @@ def call_experiment(
         datestamp (bool): Whether a datestamp should be added to the experiment name.
         kwargs: All kwargs to pass to thunk.
     """
-    # Determine number of CPU cores to run on
+    # Determine number of CPU cores to run on.
     num_cpu = psutil.cpu_count(logical=False) if num_cpu == "auto" else num_cpu
 
-    # Send random seed to thunk
+    # Send random seed to thunk.
     kwargs["seed"] = seed
 
     # Be friendly and print out your kwargs, so we all know what's up
@@ -85,7 +85,7 @@ def call_experiment(
     print(json.dumps(kwargs_json, separators=(",", ":\t"), indent=4, sort_keys=True))
     print("\n")
 
-    # Set up logger output directory
+    # Set up logger output directory.
     if "logger_kwargs" not in kwargs:
         kwargs["logger_kwargs"] = setup_logger_kwargs(
             exp_name, seed, data_dir, datestamp
@@ -101,7 +101,7 @@ def call_experiment(
             **kwargs["logger_kwargs"],
         )
 
-    # Force algorithm default if verbose_fmt is line
+    # Force algorithm default if verbose_fmt is line.
     # NOTE: Done since otherwise the stdout gets cluttered.
     if kwargs["logger_kwargs"]["verbose_fmt"] == "line":
         kwargs["logger_kwargs"]["verbose_vars"] = None
@@ -110,7 +110,7 @@ def thunk_plus():
         """Setup environment used in the experiment."""
         # Make 'env_fn' from 'env_name'
         if "env_name" in kwargs:
-            # Import gymnasium environments
+            # Import gymnasium environments.
             import gymnasium as gym
 
             # Import environment configuration file. This file can be used to inject
@@ -129,13 +129,13 @@ def thunk_plus():
             env_kwargs = kwargs.pop("env_kwargs", {})
             kwargs["env_fn"] = lambda: gym.make(env_name, **env_kwargs)
 
-        # Fork into multiple processes
+        # Fork into multiple processes.
         mpi_fork(num_cpu)
 
-        # Run thunk
+        # Run thunk.
         thunk(**kwargs)
 
-    # Prepare to launch a script to run the experiment
+    # Prepare to launch a script to run the experiment.
     pickled_thunk = cloudpickle.dumps(thunk_plus)
     encoded_thunk = base64.b64encode(zlib.compress(pickled_thunk)).decode("utf-8")
 
@@ -374,10 +374,10 @@ def get_val(v, k):
                 param_name = sh if sh is not None else k
                 param_name = valid_str(param_name)
 
-                # Get variant value for parameter k
+                # Get variant value for parameter k.
                 variant_val = get_val(variant, k)
 
-                # Append to name
+                # Append to name.
                 if all_bools(v):
                     # If this is a param which only takes boolean values,
                     # only include in the name if it's True for this variant.
diff --git a/stable_learning_control/control/utils/safer_eval.py b/stable_learning_control/control/utils/safer_eval.py
index e0a1bad31..15cae9707 100644
--- a/stable_learning_control/control/utils/safer_eval.py
+++ b/stable_learning_control/control/utils/safer_eval.py
@@ -11,7 +11,7 @@
 .. autofunction:: safer_eval
 """  # NOTE: Manual autofunction request was added because of bug https://github.com/sphinx-doc/sphinx/issues/7912#issuecomment-786011464  # noqa:E501
 
-# Import modules to which you want users to have access
+# Import modules to which you want users to have access.
 import torch  # noqa: F401
 
 import stable_learning_control as stable_learning_control  # noqa: F401
@@ -34,7 +34,7 @@ def safer_eval(*args, backend=None):
         args: The eval return values.
     """
 
-    # Import the nn module based on the backend type
+    # Import the nn module based on the backend type.
     # NOTE: This was done to enable users to specify `nn.relu` instead of
     # `torch.nn.ReLu`.
     if backend is not None and backend.lower() in ["torch", "pytorch"]:
diff --git a/stable_learning_control/control/utils/test_policy.py b/stable_learning_control/control/utils/test_policy.py
index c30a80bfb..7dd4c7b06 100644
--- a/stable_learning_control/control/utils/test_policy.py
+++ b/stable_learning_control/control/utils/test_policy.py
@@ -143,7 +143,7 @@ def load_policy_and_env(fpath, itr="last"):
             )
         )
 
-    # Retrieve model path and backend
+    # Retrieve model path and backend.
     fpath, backend = _retrieve_model_folder(fpath)
 
     if itr != "last":
@@ -152,7 +152,7 @@ def load_policy_and_env(fpath, itr="last"):
         )
         itr = "%d" % itr
 
-    # try to load environment from save
+    # try to load environment from save.
     # NOTE: Sometimes this will fail because the environment could not be pickled.
     try:
         state = joblib.load(Path(fpath).parent.joinpath("vars.pkl"))
@@ -169,7 +169,7 @@ def load_policy_and_env(fpath, itr="last"):
             )
         ) from e
 
-    # load the get_action function
+    # load the get_action function.
     try:
         if backend == "tf":
             policy = load_tf_policy(fpath, env=env, itr=itr)
@@ -205,11 +205,11 @@ def load_tf_policy(fpath, env, itr="last"):
         model_path = _retrieve_iter_folder(fpath, itr)
     else:
         model_path = fpath
-    tf = import_tf()  # Import tf if installed otherwise throw warning
+    tf = import_tf()  # Import tf if installed otherwise throw warning.
     print("\n")
     log_to_std_out("Loading model from '%s'.\n\n" % fpath, type="info")
 
-    # Retrieve get_action method
+    # Retrieve get_action method.
     save_info = load_from_json(Path(fpath).joinpath("save_info.json"))
     import stable_learning_control.control.algos.tf2 as tf2_algos
 
@@ -218,7 +218,7 @@ def load_tf_policy(fpath, env, itr="last"):
     except KeyError:
         ac_kwargs = {}
     model = getattr(tf2_algos, save_info["alg_name"])(env=env, **ac_kwargs)
-    latest = tf.train.latest_checkpoint(model_path)  # Restore latest checkpoint
+    latest = tf.train.latest_checkpoint(model_path)  # Restore latest checkpoint.
     model.load_weights(latest)
 
     return model
@@ -246,7 +246,7 @@ def load_pytorch_policy(fpath, env, itr="last"):
     print("\n")
     log_to_std_out("Loading model from '%s'.\n\n" % model_file, type="info")
 
-    # Retrieve get_action method
+    # Retrieve get_action method.
     save_info = load_from_json(Path(fpath).joinpath("save_info.json"))
     import stable_learning_control.control.algos.pytorch as torch_algos
 
@@ -256,7 +256,7 @@ def load_pytorch_policy(fpath, env, itr="last"):
     except KeyError:
         ac_kwargs = {}
     model = getattr(torch_algos, save_info["alg_name"])(env=env, **ac_kwargs)
-    model.load_state_dict(model_data)  # Retore model parameters
+    model.load_state_dict(model_data)  # Retore model parameters.
 
     return model
 
@@ -290,10 +290,10 @@ def run_policy(
 
     logger = EpochLogger(verbose_fmt="table")
     o, r, d, ep_ret, ep_len, n = env.reset(), 0, False, 0, 0, 0
-    supports_deterministic = True  # Only supported with gaussian algorithms
+    supports_deterministic = True  # Only supported with gaussian algorithms.
     render_error = False
     while n < num_episodes:
-        # Render env if requested
+        # Render env if requested.
         if render and not render_error:
             try:
                 env.render()
@@ -308,7 +308,7 @@ def run_policy(
                     type="warning",
                 )
 
-        # Retrieve action
+        # Retrieve action.
         if deterministic and supports_deterministic:
             try:
                 a = policy.get_action(o, deterministic=deterministic)
@@ -324,7 +324,7 @@ def run_policy(
         else:
             a = policy.get_action(o)
 
-        # Perform action in the environment and store result
+        # Perform action in the environment and store result.
         o, r, d, _ = env.step(a)
         ep_ret += r
         ep_len += 1
diff --git a/stable_learning_control/env_config.py b/stable_learning_control/env_config.py
index c589ca44c..9b1101b97 100644
--- a/stable_learning_control/env_config.py
+++ b/stable_learning_control/env_config.py
@@ -11,7 +11,7 @@
 
     .. code-block:: python
 
-        # Import environments you want to use
+        # Import environments you want to use.
         import custom_environment_1
         import custom_environment_2
 """  # noqa: E501
diff --git a/stable_learning_control/run.py b/stable_learning_control/run.py
index 612b4c066..7e14b0f90 100644
--- a/stable_learning_control/run.py
+++ b/stable_learning_control/run.py
@@ -90,9 +90,9 @@ def _parse_exp_cfg(cmd_line_args):  # noqa: C901
     if "--exp_cfg" in cmd_line_args:
         cfg_error = False
         exp_cfg_idx = cmd_line_args.index("--exp_cfg")
-        cmd_line_args.pop(exp_cfg_idx)  # Remove exp_cfg argument
+        cmd_line_args.pop(exp_cfg_idx)  # Remove exp_cfg argument.
 
-        # Validate config path
+        # Validate config path.
         try:
             exp_cfg_file_path = cmd_line_args.pop(exp_cfg_idx)
             exp_cfg_file_path = (
@@ -128,9 +128,9 @@ def _parse_exp_cfg(cmd_line_args):  # noqa: C901
                     type="warning",
                 )
 
-        # Read configuration values
+        # Read configuration values.
         if not cfg_error:
-            # Load exp config
+            # Load exp config.
             with open(exp_cfg_file_path) as stream:
                 try:
                     exp_cfg_params = yaml.safe_load(stream)
@@ -142,7 +142,7 @@ def _parse_exp_cfg(cmd_line_args):  # noqa: C901
                         type="warning",
                     )
 
-            # Retrieve values from exp config
+            # Retrieve values from exp config.
             log_to_std_out(
                 f"Experiment hyperparameters loaded from '{exp_cfg_file_path}'",
                 type="info",
@@ -154,14 +154,14 @@ def _parse_exp_cfg(cmd_line_args):  # noqa: C901
                     type="warning",
                 )
             else:
-                # Retrieve algorithm if not supplied by user
+                # Retrieve algorithm if not supplied by user.
                 if exp_cfg_idx == 1:
                     if "alg_name" in exp_cfg_params.keys():
                         cmd_line_args.insert(1, exp_cfg_params.pop("alg_name", None))
                 else:
                     exp_cfg_params.pop("alg_name")
 
-                # Append cfg hyperparameters to input arguments
+                # Append cfg hyperparameters to input arguments.
                 # NOTE: Here we assume comma or space separated strings to be variants.
                 exp_cfg_params = {
                     (key if key.startswith("--") else "--" + key): val
@@ -257,7 +257,7 @@ def _parse_and_execute_grid_search(cmd, args):  # noqa: C901
     """
     cmd, backend = _add_backend_to_cmd(cmd)
 
-    # warning
+    # warning.
     algo = safer_eval("stable_learning_control.control." + cmd, backend=backend)
 
     # Before all else, check to see if any of the flags is 'help'.
@@ -309,12 +309,12 @@ def _parse_and_execute_grid_search(cmd, args):  # noqa: C901
     # These special shortcuts are described by SUBSTITUTIONS.
     for special_name, true_name in SUBSTITUTIONS.items():
         if special_name in arg_dict:
-            # swap it in arg dict
+            # swap it in arg dict.
             arg_dict[true_name] = arg_dict[special_name]
             del arg_dict[special_name]
 
         if special_name in given_shorthands:
-            # point the shortcut to the right name
+            # point the shortcut to the right name.
             given_shorthands[true_name] = given_shorthands[special_name]
             del given_shorthands[special_name]
 
diff --git a/stable_learning_control/user_config.py b/stable_learning_control/user_config.py
index b6344b03a..6ec57d5bd 100644
--- a/stable_learning_control/user_config.py
+++ b/stable_learning_control/user_config.py
@@ -8,8 +8,7 @@
 
 import os.path as osp
 
-# Default neural network backend for each algo
-# (Must be either 'tf1' or 'pytorch')
+# Default neural network backend for each algo (Must be either 'tf1' or 'pytorch').
 DEFAULT_BACKEND = {
     "lac": "pytorch",
     "sac": "pytorch",
@@ -25,11 +24,11 @@
 # Whether GridSearch provides automatically-generated default shorthands:
 DEFAULT_SHORTHAND = True
 
-# Tells the GridSearch how many seconds to pause for before launching
+# Tells the GridSearch how many seconds to pause for before launching.
 # experiments.
 WAIT_BEFORE_LAUNCH = 5
 
-# Logger std out output type
+# Logger std out output type.
 # NOTE:The format in which the statistics are displayed to the terminal. Options are
 # "table"  which supplies them as a table and "line" which prints them in one line
 DEFAULT_STD_OUT_TYPE = "line"
diff --git a/stable_learning_control/utils/log_utils/helpers.py b/stable_learning_control/utils/log_utils/helpers.py
index 72fed0323..42b4f830b 100644
--- a/stable_learning_control/utils/log_utils/helpers.py
+++ b/stable_learning_control/utils/log_utils/helpers.py
@@ -57,7 +57,7 @@ def colorize(string, color, bold=False, highlight=False):
     Returns:
         str: Colorized string.
     """
-    if color:  # If not empty
+    if color:  # If not empty.
         return gym_colorize(string, color, bold, highlight)
     else:
         return string
@@ -172,10 +172,10 @@ def setup_logger_kwargs(
             A dict containing output_dir and exp_name.
     """
 
-    # Datestamp forcing
+    # Datestamp forcing.
     datestamp = datestamp or FORCE_DATESTAMP
 
-    # Make base path
+    # Make base path.
     ymd_time = time.strftime("%Y-%m-%d_") if datestamp else ""
     relpath = "".join([ymd_time, exp_name])
 
diff --git a/stable_learning_control/utils/log_utils/logx.py b/stable_learning_control/utils/log_utils/logx.py
index 44e754eea..6a6d6bf09 100644
--- a/stable_learning_control/utils/log_utils/logx.py
+++ b/stable_learning_control/utils/log_utils/logx.py
@@ -89,7 +89,7 @@ def __init__(
             exp_name (str): Experiment name.
         """
         if proc_id() == 0:
-            # Parse output_fname to see if csv was requested
+            # Parse output_fname to see if csv was requested.
             extension = osp.splitext(output_fname)[1]
             self._output_csv = True if extension.lower() == ".csv" else False
 
@@ -130,10 +130,10 @@ def __init__(
         self.tb_writer = None
         self._tabular_to_tb_dict = (
             dict()
-        )  # Stores whether tabular is logged to tensorboard when dump_tabular is called
+        )  # Stores if tabular is logged to tensorboard when dump_tabular is called.
         self._step_count_dict = (
             dict()
-        )  # Used for keeping count of the current global step
+        )  # Used for keeping count of the current global step.
 
     def log(
         self,
@@ -185,7 +185,7 @@ def log_to_tb(self, key, val, tb_prefix=None, tb_alias=None, global_step=None):
             global_step (int, optional): Global step value to record. Uses internal step
                 counter if global step is not supplied.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         var_name = tb_alias if tb_alias is not None else key
         var_name = tb_prefix + "/" + var_name if tb_prefix is not None else var_name
         self._write_to_tb(var_name, val, global_step=global_step)
@@ -258,7 +258,7 @@ def dump_tabular(self, global_step=None):  # noqa: C901
             print_keys = []
             print_vals = []
 
-            # Retrieve data from current row
+            # Retrieve data from current row.
             for key in self._log_headers:
                 val = self._log_current_row.get(key, "")
                 valstr = (
@@ -271,12 +271,12 @@ def dump_tabular(self, global_step=None):  # noqa: C901
                 print_dict[key] = valstr
                 vals.append(val)
 
-            # Log to stdout
+            # Log to stdout.
             if self.verbose:
                 if self.verbose_vars:
                     key_filter = self.verbose_vars
 
-                    # Make sure Epcoh and EnvInteract are always shown if present
+                    # Make sure Epcoh and EnvInteract are always shown if present.
                     for item in reversed(["Epoch", "TotalEnvInteracts"]):
                         if item not in key_filter and item in print_keys:
                             key_filter.insert(0, item)
@@ -313,7 +313,7 @@ def dump_tabular(self, global_step=None):  # noqa: C901
                         ]
                     )
                     self.log(print_str)
-            else:  # Increase epoch steps and time on the same line
+            else:  # Increase epoch steps and time on the same line.
                 self.log(
                     "\r{}: {:8.3G}, {}: {:8.3g}, {}: {:8.3G} s".format(
                         "Epoch",
@@ -326,18 +326,18 @@ def dump_tabular(self, global_step=None):  # noqa: C901
                     end="",
                 )
 
-            # Log to file
+            # Log to file.
             if self.output_file is not None:
                 if self._first_row:
                     self.output_file.write("\t".join(self._log_headers) + "\n")
                 self.output_file.write("\t".join(map(str, vals)) + "\n")
                 self.output_file.flush()
 
-            # Write tabular to tensorboard log
+            # Write tabular to tensorboard log.
             for key in self._log_headers:
                 if self._tabular_to_tb_dict[key]["tb_write"]:
                     val = self._log_current_row.get(key, "")
-                    # Use internal counter if global_step is None
+                    # Use internal counter if global_step is None.
                     if global_step is None:
                         if key in self._log_headers:
                             global_step = self._global_step
@@ -457,7 +457,7 @@ def load_env(cls, env_path):
                 load_path = load_path[0]
             else:
                 load_path = env_path
-        # try to load environment from save
+        # try to load environment from save.
         # NOTE: Sometimes this will fail because the environment could not be pickled.
         try:
             state = joblib.load(load_path)
@@ -530,7 +530,7 @@ def save_state(self, state_dict, itr=None):
             except (ValueError, pickle.PicklingError):
                 self.log("Warning: could not pickle state_dict.", color="red")
 
-            # Save model state
+            # Save model state.
             if hasattr(self, "tf_saver_elements"):
                 backend_folder_name = "tf2_save"
                 self._tf_save(itr)
@@ -538,7 +538,7 @@ def save_state(self, state_dict, itr=None):
                 backend_folder_name = "torch_save"
                 self._pytorch_save(itr)
 
-            # Save checkpoint state
+            # Save checkpoint state.
             if self._save_checkpoints and itr is not None:
                 itr_name = (
                     "iter%d" % itr
@@ -563,7 +563,7 @@ def setup_tf_saver(self, what_to_save):
                 Tensorflow models.
         """
         global tf
-        tf = import_tf()  # Import tf if installed otherwise throw warning
+        tf = import_tf()  # Import tf if installed otherwise throw warning.
         self.tf_saver_elements = what_to_save
         self.log("Policy will be saved to '{}'.\n".format(self.output_dir), type="info")
 
@@ -596,12 +596,12 @@ def _tf_save(self, itr=None):
                 self, "tf_saver_elements"
             ), "First have to setup saving with self.setup_tf_saver"
 
-            # Create filename
+            # Create filename.
             fpath = osp.join(self.output_dir, "tf2_save")
             fname = osp.join(fpath, "weights_checkpoint")
             os.makedirs(fpath, exist_ok=True)
 
-            # Create Checkpoints name
+            # Create Checkpoints name.
             if self._save_checkpoints and itr is not None:
                 itr_name = (
                     "iter%d" % itr
@@ -612,7 +612,7 @@ def _tf_save(self, itr=None):
                 cname = osp.join(cpath, "weights_checkpoint")
                 os.makedirs(cpath, exist_ok=True)
 
-            # Save additional algorithm information
+            # Save additional algorithm information.
             if not self._save_info_saved:
                 save_info = {
                     "alg_name": self.tf_saver_elements.__class__.__name__,
@@ -626,7 +626,7 @@ def _tf_save(self, itr=None):
                 )
                 self._save_info_saved = True
 
-            # Save model
+            # Save model.
             if isinstance(self.tf_saver_elements, tf.keras.Model) or hasattr(
                 self.tf_saver_elements, "save_weights"
             ):
@@ -634,7 +634,7 @@ def _tf_save(self, itr=None):
             else:
                 self.log(save_fail_warning, type="warning")
 
-            # Save checkpoint
+            # Save checkpoint.
             if self._save_checkpoints and itr is not None:
                 if isinstance(self.tf_saver_elements, tf.keras.Model) or hasattr(
                     self.tf_saver_elements, "save_weights"
@@ -643,7 +643,7 @@ def _tf_save(self, itr=None):
                 else:
                     self.log(save_fail_warning, type="warning")
 
-                self._checkpoint += 1  # Increase epoch
+                self._checkpoint += 1  # Increase epoch.
 
     def _pytorch_save(self, itr=None):
         """Saves the PyTorch model/models using their ``state_dict``.
@@ -664,12 +664,12 @@ def _pytorch_save(self, itr=None):
                 self, "pytorch_saver_elements"
             ), "First have to setup saving with self.setup_pytorch_saver"
 
-            # Create filename
+            # Create filename.
             fpath = osp.join(self.output_dir, "torch_save")
             fname = osp.join(fpath, "model_state.pt")
             os.makedirs(fpath, exist_ok=True)
 
-            # Create Checkpoints Name
+            # Create Checkpoints Name.
             if self._save_checkpoints and itr is not None:
                 itr_name = (
                     "iter%d" % itr
@@ -680,7 +680,7 @@ def _pytorch_save(self, itr=None):
                 cname = osp.join(cpath, "model_state.pt")
                 os.makedirs(cpath, exist_ok=True)
 
-            # Save additional algorithm information
+            # Save additional algorithm information.
             if not self._save_info_saved:
                 save_info = {
                     "alg_name": self.pytorch_saver_elements.__class__.__name__,
@@ -696,7 +696,7 @@ def _pytorch_save(self, itr=None):
                 )
                 self._save_info_saved = True
 
-            # Save model
+            # Save model.
             if isinstance(self.pytorch_saver_elements, torch.nn.Module) or hasattr(
                 self.pytorch_saver_elements, "state_dict"
             ):
@@ -704,7 +704,7 @@ def _pytorch_save(self, itr=None):
             else:
                 self.log(save_fail_warning, type="warning")
 
-            # Save checkpoint
+            # Save checkpoint.
             if self._save_checkpoints:
                 if isinstance(self.pytorch_saver_elements, torch.nn.Module) or hasattr(
                     self.pytorch_saver_elements, "state_dict"
@@ -713,7 +713,7 @@ def _pytorch_save(self, itr=None):
                 else:
                     self.log(save_fail_warning, type="warning")
 
-                self._checkpoint += 1  # Increase epoch
+                self._checkpoint += 1  # Increase epoch.
 
     def _write_to_tb(self, var_name, data, global_step=None):
         """Writes data to tensorboard log file.
@@ -729,13 +729,13 @@ def _write_to_tb(self, var_name, data, global_step=None):
                 counter if global step is not supplied.
         """
 
-        # Try to write data to tb as as historgram
+        # Try to write data to tb as as historgram.
         if not self.tb_writer:
             self.use_tensorboard = (
-                True  # Property that creates tf writer if set to True
+                True  # Property that creates tf writer if set to True.
             )
-        if is_scalar(data):  # Extra protection since trying to write a list freezes tb
-            try:  # Try to write as scalar
+        if is_scalar(data):  # Extra protection since trying to write a list freezes tb.
+            try:  # Try to write as scalar.
                 self.add_scalar(var_name, data, global_step=global_step)
             except (
                 AssertionError,
@@ -746,7 +746,7 @@ def _write_to_tb(self, var_name, data, global_step=None):
             ):
                 pass
         else:
-            # Try to write data to tb as as historgram
+            # Try to write data to tb as as historgram.
             try:
                 self.add_histogram(var_name, data, global_step=global_step)
             except (
@@ -758,7 +758,7 @@ def _write_to_tb(self, var_name, data, global_step=None):
             ):
                 pass
 
-            # Try to write data as image
+            # Try to write data as image.
             try:
                 self.add_image(var_name, data, global_step=global_step)
             except (
@@ -801,11 +801,11 @@ def use_tensorboard(self, value):
         """
         self._use_tensorboard = value
 
-        # Create tensorboard writer if use_tensorboard == True else delete
-        if self._use_tensorboard and not self.tb_writer:  # Create writer object
+        # Create tensorboard writer if use_tensorboard == True else delete.
+        if self._use_tensorboard and not self.tb_writer:  # Create writer object.
             if self._use_tf_backend:
                 self.log("Using Tensorflow as the Tensorboard backend.", type="info")
-                tf = import_tf()  # Import tf if installed otherwise throw warning
+                tf = import_tf()  # Import tf if installed otherwise throw warning.
                 self.tb_writer = tf.summary.create_file_writer(self.output_dir)
             else:
                 self.log(
@@ -818,10 +818,10 @@ def use_tensorboard(self, value):
                     comment=f"{exp_name.upper()}-data_"
                     + time.strftime("%Y%m%d-%H%M%S"),
                 )
-                atexit.register(self.tb_writer.close)  # Make sure the writer is closed
-        elif not self._use_tensorboard and self.tb_writer:  # Delete tensorboard writer
-            self.tb_writer.close()  # Close writer
-            atexit.unregister(self.tb_writer.close)  # Make sure the writer is closed
+                atexit.register(self.tb_writer.close)  # Make sure the writer is closed.
+        elif not self._use_tensorboard and self.tb_writer:  # Delete tensorboard writer.
+            self.tb_writer.close()  # Close writer.
+            atexit.unregister(self.tb_writer.close)  # Make sure the writer is closed.
             self.tb_writer = None
 
     @property
@@ -849,7 +849,7 @@ def add_hparams(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_hparams' method is not available when using the 'tensorflow' "
@@ -869,7 +869,7 @@ def add_scalar(self, *args, **kwargs):
             *args: All args to pass to the Summary/SummaryWriter object.
             **kwargs: All kwargs to pass to the Summary/SummaryWriter object.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             kwargs["step"] = kwargs.pop("global_step")
             global tf
@@ -903,7 +903,7 @@ def add_scalars(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_scalars' method is not available when using the 'tensorflow' "
@@ -924,7 +924,7 @@ def add_histogram(self, *args, **kwargs):
             *args: All args to pass to the Summary/SummaryWriter object.
             **kwargs: All kwargs to pass to the Summary/SummaryWriter object.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             kwargs["step"] = kwargs.pop("global_step")
             global tf
@@ -959,7 +959,7 @@ def add_histogram_raw(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_histogram_raw' method is not available when using the "
@@ -979,7 +979,7 @@ def add_image(self, *args, **kwargs):
             *args: All args to pass to the Summary/SummaryWriter object.
             **kwargs: All kwargs to pass to the Summary/SummaryWriter object.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             kwargs["step"] = kwargs.pop("global_step")
             global tf
@@ -1013,7 +1013,7 @@ def add_images(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_images' method is not available when using the 'tensorflow' "
@@ -1037,7 +1037,7 @@ def add_image_with_boxes(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_image_with_boxes' method is not available when using the "
@@ -1061,7 +1061,7 @@ def add_figure(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_figure' method is not available when using the 'tensorflow' "
@@ -1084,7 +1084,7 @@ def add_video(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_video' method is not available when using the 'tensorflow' "
@@ -1104,7 +1104,7 @@ def add_audio(self, *args, **kwargs):
             *args: All args to pass to the Summary/SummaryWriter object.
             **kwargs: All kwargs to pass to the Summary/SummaryWriter object.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             kwargs["step"] = kwargs.pop("global_step")
             global tf
@@ -1136,7 +1136,7 @@ def add_text(self, *args, **kwargs):
             *args: All args to pass to the Summary/SummaryWriter object.
             **kwargs: All kwargs to pass to the Summary/SummaryWriter object.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             kwargs["step"] = kwargs.pop("global_step")
             global tf
@@ -1171,7 +1171,7 @@ def add_onnx_graph(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_onnx_graph' method is not available when using the "
@@ -1194,7 +1194,7 @@ def add_graph(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_graph' method is not available when using the 'tensorflow' "
@@ -1218,7 +1218,7 @@ def add_embedding(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_embedding' method is not available when using the "
@@ -1241,7 +1241,7 @@ def add_pr_curve(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_pr_curve' method is not available when using the "
@@ -1264,7 +1264,7 @@ def add_pr_curve_raw(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_pr_curve_raw' method is not available when using the "
@@ -1290,7 +1290,7 @@ def add_custom_scalars_multilinechart(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_custom_scalars_multilinechart' method is not available "
@@ -1316,7 +1316,7 @@ def add_custom_scalars_marginchart(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_custom_scalars_marginchart' method is not available when "
@@ -1346,7 +1346,7 @@ def add_custom_scalars(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_custom_scalars' method is not available when using the "
@@ -1370,7 +1370,7 @@ def add_mesh(self, *args, **kwargs):
             NotImplementedError: Raised if you try to call this method when using the
                 Tensorflow backend.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             raise NotImplementedError(
                 "The 'add_mesh' method is not available when using the 'tensorflow' "
@@ -1392,7 +1392,7 @@ def flush(self, *args, **kwargs):
             *args: All args to pass to the Summary/SummaryWriter object.
             **kwargs: All kwargs to pass to the Summary/SummaryWriter object.
         """
-        self.use_tensorboard = True  # Make sure SummaryWriter exists
+        self.use_tensorboard = True  # Make sure SummaryWriter exists.
         if self._use_tf_backend:
             global tf
             with self.tb_writer.as_default():
@@ -1583,7 +1583,7 @@ def store(
                 counter if global step is not supplied.
         """
         for k, v in kwargs.items():
-            # Store variable values in epoch_dict and increase global step count
+            # Store variable values in epoch_dict and increase global step count.
             if not (k in self.epoch_dict.keys()):
                 self.epoch_dict[k] = []
                 self._step_count_dict[k] = 0
@@ -1592,19 +1592,19 @@ def store(
             else:
                 self.epoch_dict[k].append(v)
 
-            # Increase the step count for all the keys
+            # Increase the step count for all the keys.
             # NOTE: This is done in such a way that two values of a given key do not
-            # get the same global step value assigned to them
+            # get the same global step value assigned to them.
             self._step_count_dict[k] = (
                 self._step_count_dict[k] + 1
                 if self._step_count_dict[k] + 1 >= self._global_step
                 else self._global_step
             )
 
-            # Check if a alias was given for the current parameter
+            # Check if a alias was given for the current parameter.
             var_name = k if k not in tb_aliases.keys() else tb_aliases[k]
 
-            # Write variable value to tensorboard
+            # Write variable value to tensorboard.
             tb_write_key = (
                 (tb_write[k] if k in tb_write.keys() else False)
                 if isinstance(tb_write, dict)
@@ -1613,7 +1613,7 @@ def store(
             if tb_write_key:
                 global_step = (
                     global_step if global_step is not None else self._global_step
-                )  # Use internal counter if global_step is None
+                )  # Use internal counter if global_step is None.
                 self._write_to_tb(var_name, v, global_step=global_step)
 
     def log_to_tb(
@@ -1646,7 +1646,7 @@ def log_to_tb(
             global_step (int, optional): Global step value to record. Uses internal step
                 counter if global step is not supplied.
         """
-        if val is not None:  # When key and value are supplied use direct write
+        if val is not None:  # When key and value are supplied use direct write.
             super().log_to_tb(
                 keys,
                 val,
@@ -1654,10 +1654,10 @@ def log_to_tb(
                 tb_alias=tb_alias,
                 global_step=global_step,
             )
-        else:  # When only keys are supplied use internal storage
+        else:  # When only keys are supplied use internal storage.
             keys = [keys] if not isinstance(keys, list) else keys
             for key in keys:
-                if global_step is None:  # Retrieve global step if not supplied
+                if global_step is None:  # Retrieve global step if not supplied.
                     if self._n_table_dumps >= 1:
                         global_step_tmp = self._global_step
                     elif key in self.epoch_dict.keys():
@@ -1767,7 +1767,7 @@ def dump_tabular(self, *args, **kwargs):
         self._n_table_dumps += 1
         self._tb_index_dict = {
             key: 0 for key in self._tb_index_dict.keys()
-        }  # Reset tensorboard logging index storage dict
+        }  # Reset tensorboard logging index storage dict.
 
     def get_stats(self, key):
         """Lets an algorithm ask the logger for mean/std/min/max of a diagnostic.
diff --git a/stable_learning_control/utils/mpi_utils/mpi_pytorch.py b/stable_learning_control/utils/mpi_utils/mpi_pytorch.py
index 1f4a6b985..8e9fa89ed 100644
--- a/stable_learning_control/utils/mpi_utils/mpi_pytorch.py
+++ b/stable_learning_control/utils/mpi_utils/mpi_pytorch.py
@@ -39,15 +39,15 @@ def mpi_avg_grads(module):
     if num_procs() == 1:
         return
 
-    # Sync torch module parameters
+    # Sync torch module parameters.
     if hasattr(module, "parameters"):
         for p in module.parameters():
-            # Sync network grads
+            # Sync network grads.
             p_grad_numpy = p.grad.numpy()
             avg_p_grad = mpi_avg(p.grad)
             p_grad_numpy[:] = avg_p_grad[:]
     elif isinstance(module, torch.Tensor):
-        # Sync network grads
+        # Sync network grads.
         p_grad_numpy = module.grad.numpy()
         avg_p_grad = mpi_avg(module.grad)
         if isinstance(avg_p_grad, list):
@@ -72,19 +72,19 @@ def sync_params(module):
     if num_procs() == 1:
         return
 
-    # Sync torch module parameters
+    # Sync torch module parameters.
     if hasattr(module, "parameters"):
-        # Sync network parameters
+        # Sync network parameters.
         for p in module.parameters():
             p_numpy = p.data.numpy()
             broadcast(p_numpy)
     elif isinstance(module, torch.Tensor):
-        # Sync pytorch parameter
+        # Sync pytorch parameter.
         p_numpy = module.data.numpy()
         broadcast(p_numpy)
         return
     elif isinstance(module, np.ndarray):
-        # Sync numpy parameters
+        # Sync numpy parameters.
         broadcast(module)
     else:
         raise TypeError(
diff --git a/stable_learning_control/utils/mpi_utils/mpi_tools.py b/stable_learning_control/utils/mpi_utils/mpi_tools.py
index 867febc92..c10d7eddd 100644
--- a/stable_learning_control/utils/mpi_utils/mpi_tools.py
+++ b/stable_learning_control/utils/mpi_utils/mpi_tools.py
@@ -154,7 +154,7 @@ def mpi_statistics_scalar(x, with_min_and_max=False):
     mean = global_sum / global_n
 
     global_sum_sq = mpi_sum(np.sum((x - mean) ** 2))
-    std = np.sqrt(global_sum_sq / global_n)  # compute global std
+    std = np.sqrt(global_sum_sq / global_n)  # compute global std.
 
     if with_min_and_max:
         global_min = mpi_op(np.min(x) if len(x) > 0 else np.inf, op=MPI.MIN)