From 67990b91a92ad3d5c58cfb7174ffe3b577923920 Mon Sep 17 00:00:00 2001 From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com> Date: Tue, 29 Oct 2024 12:22:33 -0400 Subject: [PATCH] Fix lint dependency checker (#137) --- .github/workflows/dependency_checker.yml | 1 + deepecho/models/base.py | 10 +++--- deepecho/models/basic_gan.py | 12 +++---- deepecho/models/par.py | 44 +++++++++++------------- deepecho/sequences.py | 6 ++-- pyproject.toml | 30 +++++++++++++--- 6 files changed, 60 insertions(+), 43 deletions(-) diff --git a/.github/workflows/dependency_checker.yml b/.github/workflows/dependency_checker.yml index 7900502..589ce20 100644 --- a/.github/workflows/dependency_checker.yml +++ b/.github/workflows/dependency_checker.yml @@ -16,6 +16,7 @@ jobs: run: | python -m pip install .[dev] make check-deps OUTPUT_FILEPATH=latest_requirements.txt + make fix-lint - name: Create pull request id: cpr uses: peter-evans/create-pull-request@v4 diff --git a/deepecho/models/base.py b/deepecho/models/base.py index 545a3e7..2c30243 100644 --- a/deepecho/models/base.py +++ b/deepecho/models/base.py @@ -54,12 +54,12 @@ def fit_sequences(self, sequences, context_types, data_types): For example, a sequence might look something like:: { - "context": [1], - "data": [ + 'context': [1], + 'data': [ [1, 3, 4, 5, 11, 3, 4], - [2, 2, 3, 4, 5, 1, 2], - [1, 3, 4, 5, 2, 3, 1] - ] + [2, 2, 3, 4, 5, 1, 2], + [1, 3, 4, 5, 2, 3, 1], + ], } The "context" attribute maps to a list of variables which diff --git a/deepecho/models/basic_gan.py b/deepecho/models/basic_gan.py index c6beadb..4cdeb6e 100644 --- a/deepecho/models/basic_gan.py +++ b/deepecho/models/basic_gan.py @@ -271,7 +271,7 @@ def _analyze_data(self, sequences, context_types, data_types): def _normalize(tensor, value, properties): """Normalize the value between 0 and 1 and flag nans.""" value_idx, missing_idx = properties['indices'] - if pd.isnull(value): + if pd.isna(value): tensor[value_idx] = 0.0 tensor[missing_idx] = 1.0 else: @@ -493,12 +493,12 @@ def fit_sequences(self, sequences, context_types, data_types): For example, a sequence might look something like:: { - "context": [1], - "data": [ + 'context': [1], + 'data': [ [1, 3, 4, 5, 11, 3, 4], - [2, 2, 3, 4, 5, 1, 2], - [1, 3, 4, 5, 2, 3, 1] - ] + [2, 2, 3, 4, 5, 1, 2], + [1, 3, 4, 5, 2, 3, 1], + ], } The "context" attribute maps to a list of variables which diff --git a/deepecho/models/par.py b/deepecho/models/par.py index 57e2a68..46996a4 100644 --- a/deepecho/models/par.py +++ b/deepecho/models/par.py @@ -131,7 +131,7 @@ def _idx_map(self, x, t): 'type': t, 'mu': np.nanmean(x[i]), 'std': np.nanstd(x[i]), - 'nulls': pd.isnull(x[i]).any(), + 'nulls': pd.isna(x[i]).any(), 'indices': (idx, idx + 1, idx + 2), } idx += 3 @@ -141,7 +141,7 @@ def _idx_map(self, x, t): 'type': t, 'min': np.nanmin(x[i]), 'range': np.nanmax(x[i]) - np.nanmin(x[i]), - 'nulls': pd.isnull(x[i]).any(), + 'nulls': pd.isna(x[i]).any(), 'indices': (idx, idx + 1, idx + 2), } idx += 3 @@ -150,7 +150,7 @@ def _idx_map(self, x, t): idx_map[i] = {'type': t, 'indices': {}} idx += 1 for v in set(x[i]): - if pd.isnull(v): + if pd.isna(v): v = None idx_map[i]['indices'][v] = idx @@ -210,30 +210,30 @@ def _data_to_tensor(self, data): elif props['type'] in ['continuous', 'timestamp']: mu_idx, sigma_idx, missing_idx = props['indices'] - if pd.isnull(data[key][i]) or props['std'] == 0: + if pd.isna(data[key][i]) or props['std'] == 0: x[mu_idx] = 0.0 else: x[mu_idx] = (data[key][i] - props['mu']) / props['std'] x[sigma_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(data[key][i]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(data[key][i]) else 0.0 elif props['type'] in ['count']: r_idx, p_idx, missing_idx = props['indices'] - if pd.isnull(data[key][i]) or props['range'] == 0: + if pd.isna(data[key][i]) or props['range'] == 0: x[r_idx] = 0.0 else: x[r_idx] = (data[key][i] - props['min']) / props['range'] x[p_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(data[key][i]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(data[key][i]) else 0.0 elif props['type'] in [ 'categorical', 'ordinal', ]: # categorical value = data[key][i] - if pd.isnull(value): + if pd.isna(value): value = None x[props['indices'][value]] = 1.0 @@ -258,25 +258,25 @@ def _context_to_tensor(self, context): mu_idx, sigma_idx, missing_idx = props['indices'] x[mu_idx] = ( 0.0 - if (pd.isnull(context[key]) or props['std'] == 0) + if (pd.isna(context[key]) or props['std'] == 0) else (context[key] - props['mu']) / props['std'] ) x[sigma_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(context[key]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(context[key]) else 0.0 elif props['type'] in ['count']: r_idx, p_idx, missing_idx = props['indices'] x[r_idx] = ( 0.0 - if (pd.isnull(context[key]) or props['range'] == 0) + if (pd.isna(context[key]) or props['range'] == 0) else (context[key] - props['min']) / props['range'] ) x[p_idx] = 0.0 - x[missing_idx] = 1.0 if pd.isnull(context[key]) else 0.0 + x[missing_idx] = 1.0 if pd.isna(context[key]) else 0.0 elif props['type'] in ['categorical', 'ordinal']: value = context[key] - if pd.isnull(value): + if pd.isna(value): value = None x[props['indices'][value]] = 1.0 @@ -295,12 +295,12 @@ def fit_sequences(self, sequences, context_types, data_types): For example, a sequence might look something like:: { - "context": [1], - "data": [ + 'context': [1], + 'data': [ [1, 3, 4, 5, 11, 3, 4], - [2, 2, 3, 4, 5, 1, 2], - [1, 3, 4, 5, 2, 3, 1] - ] + [2, 2, 3, 4, 5, 1, 2], + [1, 3, 4, 5, 2, 3, 1], + ], } The "context" attribute maps to a list of variables which @@ -406,9 +406,7 @@ def _compute_loss(self, X_padded, Y_padded, seq_len): p_true = X_padded[: seq_len[i], i, missing_idx] p_pred = missing[: seq_len[i], i] log_likelihood += torch.sum(p_true * p_pred) - log_likelihood += torch.sum( - (1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred)) - ) + log_likelihood += torch.sum((1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred))) elif props['type'] in ['count']: r_idx, p_idx, missing_idx = props['indices'] @@ -428,9 +426,7 @@ def _compute_loss(self, X_padded, Y_padded, seq_len): p_true = X_padded[: seq_len[i], i, missing_idx] p_pred = missing[: seq_len[i], i] log_likelihood += torch.sum(p_true * p_pred) - log_likelihood += torch.sum( - (1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred)) - ) + log_likelihood += torch.sum((1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred))) elif props['type'] in ['categorical', 'ordinal']: idx = list(props['indices'].values()) diff --git a/deepecho/sequences.py b/deepecho/sequences.py index 847c511..8f0a11f 100644 --- a/deepecho/sequences.py +++ b/deepecho/sequences.py @@ -59,7 +59,7 @@ def segment_by_time(sequence, segment_size, sequence_index): while start <= max_time: end = start + segment_size selected = (start <= sequence_index) & (sequence_index < end) - sequences.append(sequence[selected.values].reset_index(drop=True)) + sequences.append(sequence[selected.to_numpy()].reset_index(drop=True)) start = end return sequences @@ -112,7 +112,7 @@ def _convert_to_dicts(segments, context_columns): if len(context.drop_duplicates()) > 1: raise ValueError('Context columns are not constant within each segment.') - context = context.iloc[0].values + context = context.iloc[0].to_numpy() segment = segment.drop(context_columns, axis=1) else: context = [] @@ -180,7 +180,7 @@ def assemble_sequences( segments = [] groupby_columns = entity_columns[0] if len(entity_columns) == 1 else entity_columns for _, sequence in data.groupby(groupby_columns): - sequence.drop(entity_columns, axis=1, inplace=True) + sequence = sequence.drop(entity_columns, axis=1) if context_columns: if len(sequence[context_columns].drop_duplicates()) > 1: raise ValueError('Context columns are not constant within each entity.') diff --git a/pyproject.toml b/pyproject.toml index 2152037..52a9ae5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ dev = [ 'watchdog>=1.0.1,<5', # style check - 'ruff>=0.3.2,<0.7.2', + 'ruff>=0.3.2,<1', # distribute on PyPI 'twine>=1.10.0,<4', @@ -160,7 +160,8 @@ build-backend = 'setuptools.build_meta' [tool.ruff] preview = true -line-length = 99 +line-length = 100 +indent-width = 4 src = ["deepecho"] target-version = "py312" exclude = [ @@ -168,7 +169,9 @@ exclude = [ ".tox", ".git", "__pycache__", - ".ipynb_checkpoints" + "*.ipynb", + ".ipynb_checkpoints", + "tasks.py", ] [tool.ruff.lint] @@ -178,13 +181,23 @@ select = [ # Pycodestyle "E", "W", + # pydocstyle + "D", # isort - "I001" + "I001", + # print statements + "T201", + # pandas-vet + "PD", + # numpy 2.0 + "NPY201" ] ignore = [ - "E501", + # pydocstyle "D107", # Missing docstring in __init__ "D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449 + "PD901", + "PD101", ] [tool.ruff.lint.pep8-naming] @@ -192,6 +205,7 @@ extend-ignore-names = ["X", "C", "X_padded", "Y", "Y_padded"] [tool.ruff.lint.isort] known-first-party = ["deepecho"] +lines-between-types = 0 [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"] @@ -200,6 +214,12 @@ known-first-party = ["deepecho"] quote-style = "single" indent-style = "space" preview = true +docstring-code-format = true +docstring-code-line-length = "dynamic" [tool.ruff.lint.pydocstyle] convention = "google" + +[tool.ruff.lint.pycodestyle] +max-doc-length = 100 +max-line-length = 100