Skip to content

Commit

Permalink
[pre-commit.ci] pre-commit autoupdate (#35)
Browse files Browse the repository at this point in the history
* [pre-commit.ci] pre-commit autoupdate

updates:
- [github.com/pre-commit/mirrors-mypy: v1.4.0 → v1.4.1](pre-commit/mirrors-mypy@v1.4.0...v1.4.1)

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update commit hooks

* add __version__ to __init__.py

* rename df variables to be more descriptive

* use typing.NamedTuple over collections.namedtuple

* clean up ruff ignore

* drop https://github.com/nbQA-dev/nbQA, use ruff beta jupyter support

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Janosh Riebesell <[email protected]>
  • Loading branch information
pre-commit-ci[bot] and janosh authored Aug 16, 2023
1 parent ec3ecba commit 1ace02f
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 64 deletions.
12 changes: 3 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ default_install_hook_types: [pre-commit, commit-msg]

repos:
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.275
rev: v0.0.284
hooks:
- id: ruff
args: [--fix]

- repo: https://github.com/psf/black
rev: 23.3.0
rev: 23.7.0
hooks:
- id: black-jupyter

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.4.0
rev: v1.5.0
hooks:
- id: mypy

Expand All @@ -41,12 +41,6 @@ repos:
stages: [commit, commit-msg]
exclude_types: [jupyter, bib]

- repo: https://github.com/nbQA-dev/nbQA
rev: 1.7.0
hooks:
- id: nbqa-ruff
args: [--fix]

- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
hooks:
Expand Down
2 changes: 1 addition & 1 deletion examples/functorch_mlp_ensemble.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@
}
],
"source": [
"for metric in (\"accuracy\", \"loss\"):\n",
"for metric in (\"accuracy\", \"loss\"): # noqa: B007\n",
" !tb-reducer {writer.log_dir}/training_{metric}* \\\n",
" --outpath {writer.log_dir}/training_{metric} \\\n",
" --reduce-ops mean,std,min,max \\\n",
Expand Down
9 changes: 4 additions & 5 deletions examples/wandb_integration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@
" out = self.layer1(x)\n",
" out = self.layer2(out)\n",
" out = out.reshape(out.size(0), -1)\n",
" out = self.fc(out)\n",
" return out"
" return self.fc(out)"
]
},
{
Expand Down Expand Up @@ -853,13 +852,13 @@
],
"source": [
"dict_of_df = {k: pd.DataFrame(v) for k, v in reduced_runs.items()}\n",
"df = pd.concat(dict_of_df, axis=1)\n",
"df_reduced = pd.concat(dict_of_df, axis=1)\n",
"\n",
"plt.rc(\"font\", size=16)\n",
"\n",
"_, axs = plt.subplots(1, 2, figsize=(14, 5))\n",
"df.filter(like=\"loss\").plot(ax=axs[0])\n",
"df.filter(like=\"accuracy\").plot(ax=axs[1])"
"df_reduced.filter(like=\"loss\").plot(ax=axs[0])\n",
"df_reduced.filter(like=\"accuracy\").plot(ax=axs[1])"
]
},
{
Expand Down
20 changes: 8 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ warn_unused_ignores = true

[tool.ruff]
target-version = "py38"
include = ["**/pyproject.toml", "*.ipynb", "*.py", "*.pyi"]
select = [
"B", # flake8-bugbear
"C4", # flake8-comprehensions
Expand Down Expand Up @@ -95,19 +96,14 @@ select = [
"YTT", # flake8-2020
]
ignore = [
"D100", # Missing docstring in public module
"D104", # Missing docstring in public package
"D205", # 1 blank line required between summary line and description
"N802", # invalid-function-name
"N806", # non-lowercase-variable-in-function
"PD901", # pandas-df-variable-name
"PLC0414", # useless-import-alias
"PLR", # pylint refactor
"PT006", # pytest-parametrize-names-wrong-type
"PT013", # pytest-incorrect-pytest-import
"D100", # Missing docstring in public module
"D205", # 1 blank line required between summary line and description
"PLR", # pylint refactor
"PT006", # pytest-parametrize-names-wrong-type
]
pydocstyle.convention = "google"

[tool.ruff.per-file-ignores]
"tests/*" = ["D103"]
"examples/*" = ["D102", "D103", "D107", "E402"]
"tests/*" = ["D103", "D104"]
"__init__.py" = ["F401"]
"examples/*" = ["D102", "D103", "D107", "E402", "FA102"]
22 changes: 16 additions & 6 deletions tensorboard_reducer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
from tensorboard_reducer.load import load_tb_events as load_tb_events
from tensorboard_reducer.main import main as main
from tensorboard_reducer.reduce import reduce_events as reduce_events
from tensorboard_reducer.write import write_data_file as write_data_file
from tensorboard_reducer.write import write_df as write_df
from tensorboard_reducer.write import write_tb_events as write_tb_events
"""TensorBoard Reducer package.
Author: Janosh Riebesell (2021-04-04)
"""

from importlib.metadata import PackageNotFoundError, version

from tensorboard_reducer.load import load_tb_events
from tensorboard_reducer.main import main
from tensorboard_reducer.reduce import reduce_events
from tensorboard_reducer.write import write_data_file, write_df, write_tb_events

try: # noqa: SIM105
__version__ = version("tensorboard-reducer")
except PackageNotFoundError:
pass # package not installed
11 changes: 8 additions & 3 deletions tensorboard_reducer/event_loader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from __future__ import annotations

import threading
from collections import namedtuple
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, NamedTuple

from tensorboard.backend.event_processing import (
directory_watcher,
Expand All @@ -14,7 +13,13 @@
if TYPE_CHECKING:
from tensorboard.compat.proto.event_pb2 import Event

ScalarEvent = namedtuple("ScalarEvent", ["wall_time", "step", "value"])

class ScalarEvent(NamedTuple):
"""A logged scalar value."""

wall_time: float
step: int
value: float


class EventAccumulator:
Expand Down
24 changes: 12 additions & 12 deletions tensorboard_reducer/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@ def load_tb_events(

for tag in accumulator.scalar_tags:
# accumulator.Scalars() returns columns 'step', 'wall_time', 'value'
df = pd.DataFrame(accumulator.Scalars(tag)).set_index("step")
df = df.drop(columns="wall_time")
df_scalar = pd.DataFrame(accumulator.Scalars(tag)).set_index("step")
df_scalar = df_scalar.drop(columns="wall_time")

if handle_dup_steps is None and not df.index.is_unique:
if handle_dup_steps is None and not df_scalar.index.is_unique:
raise ValueError(
f"Tag '{tag}' from run directory '{in_dir}' contains duplicate "
"steps. Please make sure your data wasn't corrupted. If this is "
Expand All @@ -118,12 +118,12 @@ def load_tb_events(
"or take their mean."
)
if handle_dup_steps == "mean":
df = df.groupby(df.index).mean()
df_scalar = df_scalar.groupby(df_scalar.index).mean()
elif handle_dup_steps in ("keep-first", "keep-last"):
keep = handle_dup_steps.replace("keep-", "")
df = df[~df.index.duplicated(keep=keep)]
df_scalar = df_scalar[~df_scalar.index.duplicated(keep=keep)]

load_dict[tag].append(df)
load_dict[tag].append(df_scalar)

# Safety check: make sure all loaded runs have equal numbers of steps for each tag
# unless user set strict_steps=False.
Expand Down Expand Up @@ -162,10 +162,10 @@ def load_tb_events(
# That is, we retain all steps as long as any run recorded a value for it.
# Only makes a difference if strict_steps=False and different runs have
# non-overlapping steps.
df = pd.concat(lst, join="outer", axis=1)
df_scalar = pd.concat(lst, join="outer", axis=1)
# count(axis=1) returns the number of non-NaN values in each row
df = df[df.count(axis=1) >= min_runs_per_step]
out_dict[key] = df
df_scalar = df_scalar[df_scalar.count(axis=1) >= min_runs_per_step]
out_dict[key] = df_scalar

else:
# join='inner' means keep only the intersection of indices from all joined
Expand All @@ -179,7 +179,7 @@ def load_tb_events(
if verbose:
n_tags = len(out_dict)
if strict_steps and strict_tags:
n_steps, n_events = list(out_dict.values())[0].shape
n_steps, n_events = next(iter(out_dict.values())).shape
print(
f"Loaded {n_events} TensorBoard runs with {n_tags} scalars "
f"and {n_steps} steps each"
Expand All @@ -190,8 +190,8 @@ def load_tb_events(
)

for tag in list(out_dict)[:50]:
df = out_dict[tag]
print(f"- '{tag}': {df.shape}")
df_scalar = out_dict[tag]
print(f"- '{tag}': {df_scalar.shape}")
if len(out_dict) > 50:
print("...")

Expand Down
12 changes: 6 additions & 6 deletions tensorboard_reducer/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,18 +173,18 @@ def write_data_file(
# create multi-index dataframe from event data with reduce op names as 1st-level col
# names and tag names as 2nd level
dict_of_dfs = {op: pd.DataFrame(dic) for op, dic in data_to_write.items()}
df = pd.concat(dict_of_dfs, axis=1)
df.columns = df.columns.swaplevel(0, 1)
df.index.name = "step"
df_out = pd.concat(dict_of_dfs, axis=1)
df_out.columns = df_out.columns.swaplevel(0, 1)
df_out.index.name = "step"

# let pandas handle compression inference from extensions (.csv.gz, .json.bz2, etc.)
basename = os.path.basename(out_path)
if ".csv" in basename.lower():
df.to_csv(out_path)
df_out.to_csv(out_path)
elif ".json" in basename.lower():
df.to_json(out_path)
df_out.to_json(out_path)
elif ".xlsx" in out_path.lower():
df.to_excel(out_path)
df_out.to_excel(out_path)
else:
raise ValueError(
f"{out_path=} has unknown extension, should be one of {_known_extensions} "
Expand Down
3 changes: 1 addition & 2 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import TYPE_CHECKING

import pytest
from pytest import CaptureFixture

from tensorboard_reducer import main

Expand Down Expand Up @@ -81,7 +80,7 @@ def test_main_lax_csv_output(tmp_path: Path) -> None:


@pytest.mark.parametrize("arg", ["-v", "--version"])
def test_main_report_version(capsys: CaptureFixture[str], arg: str) -> None:
def test_main_report_version(capsys: pytest.CaptureFixture[str], arg: str) -> None:
"""Test CLI version flag."""
with pytest.raises(SystemExit) as exc_info:
main([arg])
Expand Down
4 changes: 2 additions & 2 deletions tests/test_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def generate_sample_data(
events_dict = {}
for idx in range(n_tags):
data = np.random.random((n_steps, n_runs))
df = pd.DataFrame(data, columns=[f"run_{j}" for j in range(n_runs)])
events_dict[f"tag_{idx}"] = df
df_rand = pd.DataFrame(data, columns=[f"run_{j}" for j in range(n_runs)])
events_dict[f"tag_{idx}"] = df_rand
return events_dict


Expand Down
12 changes: 6 additions & 6 deletions tests/test_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,20 @@ def test_write_data_file(
tbr.write_data_file(reduced_events, file_path, verbose=verbose)

if ".csv" in extension:
df = pd.read_csv(file_path, header=[0, 1], index_col=0)
df_actual = pd.read_csv(file_path, header=[0, 1], index_col=0)
elif ".json" in extension:
df = pd.read_json(file_path)
df.columns = map(ast.literal_eval, df.columns)
df_actual = pd.read_json(file_path)
df_actual.columns = map(ast.literal_eval, df_actual.columns)
elif ".xlsx" in extension:
df = pd.read_excel(file_path, header=[0, 1], index_col=0)
df_actual = pd.read_excel(file_path, header=[0, 1], index_col=0)

reduce_ops = list(reduced_events)
tag_name = list(reduced_events[reduce_ops[0]])
expected_cols = list(itertools.product(tag_name, reduce_ops))
n_steps = len(reduced_events[reduce_ops[0]][tag_name[0]])

assert list(df) == expected_cols, "Unexpected df columns"
assert df.shape == (n_steps, len(reduce_ops)), "Unexpected df shape"
assert list(df_actual) == expected_cols, "Unexpected df columns"
assert df_actual.shape == (n_steps, len(reduce_ops)), "Unexpected df shape"

out_path = tbr.write_data_file(reduced_events, file_path, overwrite=True)

Expand Down

0 comments on commit 1ace02f

Please sign in to comment.