[pre-commit.ci] pre-commit autoupdate (#35)

* [pre-commit.ci] pre-commit autoupdate updates: - [github.com/pre-commit/mirrors-mypy: v1.4.0 → v1.4.1](pre-commit/mirrors-mypy@v1.4.0...v1.4.1) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update commit hooks * add __version__ to __init__.py * rename df variables to be more descriptive * use typing.NamedTuple over collections.namedtuple * clean up ruff ignore * drop https://github.com/nbQA-dev/nbQA, use ruff beta jupyter support --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Janosh Riebesell <[email protected]>
janosh · Aug 16, 2023 · 1ace02f · 1ace02f
1 parent ec3ecba
commit 1ace02f
Show file tree

Hide file tree

Showing 11 changed files with 67 additions and 64 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,18 +7,18 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.275
+    rev: v0.0.284
     hooks:
       - id: ruff
         args: [--fix]
 
   - repo: https://github.com/psf/black
-    rev: 23.3.0
+    rev: 23.7.0
     hooks:
       - id: black-jupyter
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.4.0
+    rev: v1.5.0
     hooks:
       - id: mypy
 
@@ -41,12 +41,6 @@ repos:
         stages: [commit, commit-msg]
         exclude_types: [jupyter, bib]
 
-  - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.7.0
-    hooks:
-      - id: nbqa-ruff
-        args: [--fix]
-
   - repo: https://github.com/kynan/nbstripout
     rev: 0.6.1
     hooks:

diff --git a/examples/functorch_mlp_ensemble.ipynb b/examples/functorch_mlp_ensemble.ipynb
@@ -522,7 +522,7 @@
     }
    ],
    "source": [
-    "for metric in (\"accuracy\", \"loss\"):\n",
+    "for metric in (\"accuracy\", \"loss\"):  # noqa: B007\n",
     "    !tb-reducer {writer.log_dir}/training_{metric}* \\\n",
     "        --outpath {writer.log_dir}/training_{metric} \\\n",
     "        --reduce-ops mean,std,min,max \\\n",

diff --git a/examples/wandb_integration.ipynb b/examples/wandb_integration.ipynb
@@ -281,8 +281,7 @@
     "        out = self.layer1(x)\n",
     "        out = self.layer2(out)\n",
     "        out = out.reshape(out.size(0), -1)\n",
-    "        out = self.fc(out)\n",
-    "        return out"
+    "        return self.fc(out)"
    ]
   },
   {
@@ -853,13 +852,13 @@
    ],
    "source": [
     "dict_of_df = {k: pd.DataFrame(v) for k, v in reduced_runs.items()}\n",
-    "df = pd.concat(dict_of_df, axis=1)\n",
+    "df_reduced = pd.concat(dict_of_df, axis=1)\n",
     "\n",
     "plt.rc(\"font\", size=16)\n",
     "\n",
     "_, axs = plt.subplots(1, 2, figsize=(14, 5))\n",
-    "df.filter(like=\"loss\").plot(ax=axs[0])\n",
-    "df.filter(like=\"accuracy\").plot(ax=axs[1])"
+    "df_reduced.filter(like=\"loss\").plot(ax=axs[0])\n",
+    "df_reduced.filter(like=\"accuracy\").plot(ax=axs[1])"
    ]
   },
   {

diff --git a/pyproject.toml b/pyproject.toml
@@ -64,6 +64,7 @@ warn_unused_ignores = true
 
 [tool.ruff]
 target-version = "py38"
+include = ["**/pyproject.toml", "*.ipynb", "*.py", "*.pyi"]
 select = [
   "B",    # flake8-bugbear
   "C4",   # flake8-comprehensions
@@ -95,19 +96,14 @@ select = [
   "YTT",  # flake8-2020
 ]
 ignore = [
-  "D100",    # Missing docstring in public module
-  "D104",    # Missing docstring in public package
-  "D205",    # 1 blank line required between summary line and description
-  "N802",    # invalid-function-name
-  "N806",    # non-lowercase-variable-in-function
-  "PD901",   # pandas-df-variable-name
-  "PLC0414", # useless-import-alias
-  "PLR",     # pylint refactor
-  "PT006",   # pytest-parametrize-names-wrong-type
-  "PT013",   # pytest-incorrect-pytest-import
+  "D100",  # Missing docstring in public module
+  "D205",  # 1 blank line required between summary line and description
+  "PLR",   # pylint refactor
+  "PT006", # pytest-parametrize-names-wrong-type
 ]
 pydocstyle.convention = "google"
 
 [tool.ruff.per-file-ignores]
-"tests/*" = ["D103"]
-"examples/*" = ["D102", "D103", "D107", "E402"]
+"tests/*" = ["D103", "D104"]
+"__init__.py" = ["F401"]
+"examples/*" = ["D102", "D103", "D107", "E402", "FA102"]
diff --git a/tensorboard_reducer/__init__.py b/tensorboard_reducer/__init__.py
@@ -1,6 +1,16 @@
-from tensorboard_reducer.load import load_tb_events as load_tb_events
-from tensorboard_reducer.main import main as main
-from tensorboard_reducer.reduce import reduce_events as reduce_events
-from tensorboard_reducer.write import write_data_file as write_data_file
-from tensorboard_reducer.write import write_df as write_df
-from tensorboard_reducer.write import write_tb_events as write_tb_events
+"""TensorBoard Reducer package.
+
+Author: Janosh Riebesell (2021-04-04)
+"""
+
+from importlib.metadata import PackageNotFoundError, version
+
+from tensorboard_reducer.load import load_tb_events
+from tensorboard_reducer.main import main
+from tensorboard_reducer.reduce import reduce_events
+from tensorboard_reducer.write import write_data_file, write_df, write_tb_events
+
+try:  # noqa: SIM105
+    __version__ = version("tensorboard-reducer")
+except PackageNotFoundError:
+    pass  # package not installed
diff --git a/tensorboard_reducer/event_loader.py b/tensorboard_reducer/event_loader.py
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
 import threading
-from collections import namedtuple
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, NamedTuple
 
 from tensorboard.backend.event_processing import (
     directory_watcher,
@@ -14,7 +13,13 @@
 if TYPE_CHECKING:
     from tensorboard.compat.proto.event_pb2 import Event
 
-ScalarEvent = namedtuple("ScalarEvent", ["wall_time", "step", "value"])
+
+class ScalarEvent(NamedTuple):
+    """A logged scalar value."""
+
+    wall_time: float
+    step: int
+    value: float
 
 
 class EventAccumulator:

diff --git a/tensorboard_reducer/load.py b/tensorboard_reducer/load.py
@@ -103,10 +103,10 @@ def load_tb_events(
 
         for tag in accumulator.scalar_tags:
             # accumulator.Scalars() returns columns 'step', 'wall_time', 'value'
-            df = pd.DataFrame(accumulator.Scalars(tag)).set_index("step")
-            df = df.drop(columns="wall_time")
+            df_scalar = pd.DataFrame(accumulator.Scalars(tag)).set_index("step")
+            df_scalar = df_scalar.drop(columns="wall_time")
 
-            if handle_dup_steps is None and not df.index.is_unique:
+            if handle_dup_steps is None and not df_scalar.index.is_unique:
                 raise ValueError(
                     f"Tag '{tag}' from run directory '{in_dir}' contains duplicate "
                     "steps. Please make sure your data wasn't corrupted. If this is "
@@ -118,12 +118,12 @@ def load_tb_events(
                     "or take their mean."
                 )
             if handle_dup_steps == "mean":
-                df = df.groupby(df.index).mean()
+                df_scalar = df_scalar.groupby(df_scalar.index).mean()
             elif handle_dup_steps in ("keep-first", "keep-last"):
                 keep = handle_dup_steps.replace("keep-", "")
-                df = df[~df.index.duplicated(keep=keep)]
+                df_scalar = df_scalar[~df_scalar.index.duplicated(keep=keep)]
 
-            load_dict[tag].append(df)
+            load_dict[tag].append(df_scalar)
 
     # Safety check: make sure all loaded runs have equal numbers of steps for each tag
     # unless user set strict_steps=False.
@@ -162,10 +162,10 @@ def load_tb_events(
             # That is, we retain all steps as long as any run recorded a value for it.
             # Only makes a difference if strict_steps=False and different runs have
             # non-overlapping steps.
-            df = pd.concat(lst, join="outer", axis=1)
+            df_scalar = pd.concat(lst, join="outer", axis=1)
             # count(axis=1) returns the number of non-NaN values in each row
-            df = df[df.count(axis=1) >= min_runs_per_step]
-            out_dict[key] = df
+            df_scalar = df_scalar[df_scalar.count(axis=1) >= min_runs_per_step]
+            out_dict[key] = df_scalar
 
     else:
         # join='inner' means keep only the intersection of indices from all joined
@@ -179,7 +179,7 @@ def load_tb_events(
     if verbose:
         n_tags = len(out_dict)
         if strict_steps and strict_tags:
-            n_steps, n_events = list(out_dict.values())[0].shape
+            n_steps, n_events = next(iter(out_dict.values())).shape
             print(
                 f"Loaded {n_events} TensorBoard runs with {n_tags} scalars "
                 f"and {n_steps} steps each"
@@ -190,8 +190,8 @@ def load_tb_events(
             )
 
             for tag in list(out_dict)[:50]:
-                df = out_dict[tag]
-                print(f"- '{tag}': {df.shape}")
+                df_scalar = out_dict[tag]
+                print(f"- '{tag}': {df_scalar.shape}")
             if len(out_dict) > 50:
                 print("...")
 

diff --git a/tensorboard_reducer/write.py b/tensorboard_reducer/write.py
@@ -173,18 +173,18 @@ def write_data_file(
     # create multi-index dataframe from event data with reduce op names as 1st-level col
     # names and tag names as 2nd level
     dict_of_dfs = {op: pd.DataFrame(dic) for op, dic in data_to_write.items()}
-    df = pd.concat(dict_of_dfs, axis=1)
-    df.columns = df.columns.swaplevel(0, 1)
-    df.index.name = "step"
+    df_out = pd.concat(dict_of_dfs, axis=1)
+    df_out.columns = df_out.columns.swaplevel(0, 1)
+    df_out.index.name = "step"
 
     # let pandas handle compression inference from extensions (.csv.gz, .json.bz2, etc.)
     basename = os.path.basename(out_path)
     if ".csv" in basename.lower():
-        df.to_csv(out_path)
+        df_out.to_csv(out_path)
     elif ".json" in basename.lower():
-        df.to_json(out_path)
+        df_out.to_json(out_path)
     elif ".xlsx" in out_path.lower():
-        df.to_excel(out_path)
+        df_out.to_excel(out_path)
     else:
         raise ValueError(
             f"{out_path=} has unknown extension, should be one of {_known_extensions} "

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -5,7 +5,6 @@
 from typing import TYPE_CHECKING
 
 import pytest
-from pytest import CaptureFixture
 
 from tensorboard_reducer import main
 
@@ -81,7 +80,7 @@ def test_main_lax_csv_output(tmp_path: Path) -> None:
 
 
 @pytest.mark.parametrize("arg", ["-v", "--version"])
-def test_main_report_version(capsys: CaptureFixture[str], arg: str) -> None:
+def test_main_report_version(capsys: pytest.CaptureFixture[str], arg: str) -> None:
     """Test CLI version flag."""
     with pytest.raises(SystemExit) as exc_info:
         main([arg])

diff --git a/tests/test_reduce.py b/tests/test_reduce.py
@@ -15,8 +15,8 @@ def generate_sample_data(
     events_dict = {}
     for idx in range(n_tags):
         data = np.random.random((n_steps, n_runs))
-        df = pd.DataFrame(data, columns=[f"run_{j}" for j in range(n_runs)])
-        events_dict[f"tag_{idx}"] = df
+        df_rand = pd.DataFrame(data, columns=[f"run_{j}" for j in range(n_runs)])
+        events_dict[f"tag_{idx}"] = df_rand
     return events_dict
 
 

diff --git a/tests/test_write.py b/tests/test_write.py
@@ -73,20 +73,20 @@ def test_write_data_file(
     tbr.write_data_file(reduced_events, file_path, verbose=verbose)
 
     if ".csv" in extension:
-        df = pd.read_csv(file_path, header=[0, 1], index_col=0)
+        df_actual = pd.read_csv(file_path, header=[0, 1], index_col=0)
     elif ".json" in extension:
-        df = pd.read_json(file_path)
-        df.columns = map(ast.literal_eval, df.columns)
+        df_actual = pd.read_json(file_path)
+        df_actual.columns = map(ast.literal_eval, df_actual.columns)
     elif ".xlsx" in extension:
-        df = pd.read_excel(file_path, header=[0, 1], index_col=0)
+        df_actual = pd.read_excel(file_path, header=[0, 1], index_col=0)
 
     reduce_ops = list(reduced_events)
     tag_name = list(reduced_events[reduce_ops[0]])
     expected_cols = list(itertools.product(tag_name, reduce_ops))
     n_steps = len(reduced_events[reduce_ops[0]][tag_name[0]])
 
-    assert list(df) == expected_cols, "Unexpected df columns"
-    assert df.shape == (n_steps, len(reduce_ops)), "Unexpected df shape"
+    assert list(df_actual) == expected_cols, "Unexpected df columns"
+    assert df_actual.shape == (n_steps, len(reduce_ops)), "Unexpected df shape"
 
     out_path = tbr.write_data_file(reduced_events, file_path, overwrite=True)