Skip to content

Commit

Permalink
Fix lint dependency checker (#137)
Browse files Browse the repository at this point in the history
  • Loading branch information
R-Palazzo authored Oct 29, 2024
1 parent c4ae194 commit 67990b9
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 43 deletions.
1 change: 1 addition & 0 deletions .github/workflows/dependency_checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
run: |
python -m pip install .[dev]
make check-deps OUTPUT_FILEPATH=latest_requirements.txt
make fix-lint
- name: Create pull request
id: cpr
uses: peter-evans/create-pull-request@v4
Expand Down
10 changes: 5 additions & 5 deletions deepecho/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ def fit_sequences(self, sequences, context_types, data_types):
For example, a sequence might look something like::
{
"context": [1],
"data": [
'context': [1],
'data': [
[1, 3, 4, 5, 11, 3, 4],
[2, 2, 3, 4, 5, 1, 2],
[1, 3, 4, 5, 2, 3, 1]
]
[2, 2, 3, 4, 5, 1, 2],
[1, 3, 4, 5, 2, 3, 1],
],
}
The "context" attribute maps to a list of variables which
Expand Down
12 changes: 6 additions & 6 deletions deepecho/models/basic_gan.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def _analyze_data(self, sequences, context_types, data_types):
def _normalize(tensor, value, properties):
"""Normalize the value between 0 and 1 and flag nans."""
value_idx, missing_idx = properties['indices']
if pd.isnull(value):
if pd.isna(value):
tensor[value_idx] = 0.0
tensor[missing_idx] = 1.0
else:
Expand Down Expand Up @@ -493,12 +493,12 @@ def fit_sequences(self, sequences, context_types, data_types):
For example, a sequence might look something like::
{
"context": [1],
"data": [
'context': [1],
'data': [
[1, 3, 4, 5, 11, 3, 4],
[2, 2, 3, 4, 5, 1, 2],
[1, 3, 4, 5, 2, 3, 1]
]
[2, 2, 3, 4, 5, 1, 2],
[1, 3, 4, 5, 2, 3, 1],
],
}
The "context" attribute maps to a list of variables which
Expand Down
44 changes: 20 additions & 24 deletions deepecho/models/par.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _idx_map(self, x, t):
'type': t,
'mu': np.nanmean(x[i]),
'std': np.nanstd(x[i]),
'nulls': pd.isnull(x[i]).any(),
'nulls': pd.isna(x[i]).any(),
'indices': (idx, idx + 1, idx + 2),
}
idx += 3
Expand All @@ -141,7 +141,7 @@ def _idx_map(self, x, t):
'type': t,
'min': np.nanmin(x[i]),
'range': np.nanmax(x[i]) - np.nanmin(x[i]),
'nulls': pd.isnull(x[i]).any(),
'nulls': pd.isna(x[i]).any(),
'indices': (idx, idx + 1, idx + 2),
}
idx += 3
Expand All @@ -150,7 +150,7 @@ def _idx_map(self, x, t):
idx_map[i] = {'type': t, 'indices': {}}
idx += 1
for v in set(x[i]):
if pd.isnull(v):
if pd.isna(v):
v = None

idx_map[i]['indices'][v] = idx
Expand Down Expand Up @@ -210,30 +210,30 @@ def _data_to_tensor(self, data):

elif props['type'] in ['continuous', 'timestamp']:
mu_idx, sigma_idx, missing_idx = props['indices']
if pd.isnull(data[key][i]) or props['std'] == 0:
if pd.isna(data[key][i]) or props['std'] == 0:
x[mu_idx] = 0.0
else:
x[mu_idx] = (data[key][i] - props['mu']) / props['std']

x[sigma_idx] = 0.0
x[missing_idx] = 1.0 if pd.isnull(data[key][i]) else 0.0
x[missing_idx] = 1.0 if pd.isna(data[key][i]) else 0.0

elif props['type'] in ['count']:
r_idx, p_idx, missing_idx = props['indices']
if pd.isnull(data[key][i]) or props['range'] == 0:
if pd.isna(data[key][i]) or props['range'] == 0:
x[r_idx] = 0.0
else:
x[r_idx] = (data[key][i] - props['min']) / props['range']

x[p_idx] = 0.0
x[missing_idx] = 1.0 if pd.isnull(data[key][i]) else 0.0
x[missing_idx] = 1.0 if pd.isna(data[key][i]) else 0.0

elif props['type'] in [
'categorical',
'ordinal',
]: # categorical
value = data[key][i]
if pd.isnull(value):
if pd.isna(value):
value = None
x[props['indices'][value]] = 1.0

Expand All @@ -258,25 +258,25 @@ def _context_to_tensor(self, context):
mu_idx, sigma_idx, missing_idx = props['indices']
x[mu_idx] = (
0.0
if (pd.isnull(context[key]) or props['std'] == 0)
if (pd.isna(context[key]) or props['std'] == 0)
else (context[key] - props['mu']) / props['std']
)
x[sigma_idx] = 0.0
x[missing_idx] = 1.0 if pd.isnull(context[key]) else 0.0
x[missing_idx] = 1.0 if pd.isna(context[key]) else 0.0

elif props['type'] in ['count']:
r_idx, p_idx, missing_idx = props['indices']
x[r_idx] = (
0.0
if (pd.isnull(context[key]) or props['range'] == 0)
if (pd.isna(context[key]) or props['range'] == 0)
else (context[key] - props['min']) / props['range']
)
x[p_idx] = 0.0
x[missing_idx] = 1.0 if pd.isnull(context[key]) else 0.0
x[missing_idx] = 1.0 if pd.isna(context[key]) else 0.0

elif props['type'] in ['categorical', 'ordinal']:
value = context[key]
if pd.isnull(value):
if pd.isna(value):
value = None
x[props['indices'][value]] = 1.0

Expand All @@ -295,12 +295,12 @@ def fit_sequences(self, sequences, context_types, data_types):
For example, a sequence might look something like::
{
"context": [1],
"data": [
'context': [1],
'data': [
[1, 3, 4, 5, 11, 3, 4],
[2, 2, 3, 4, 5, 1, 2],
[1, 3, 4, 5, 2, 3, 1]
]
[2, 2, 3, 4, 5, 1, 2],
[1, 3, 4, 5, 2, 3, 1],
],
}
The "context" attribute maps to a list of variables which
Expand Down Expand Up @@ -406,9 +406,7 @@ def _compute_loss(self, X_padded, Y_padded, seq_len):
p_true = X_padded[: seq_len[i], i, missing_idx]
p_pred = missing[: seq_len[i], i]
log_likelihood += torch.sum(p_true * p_pred)
log_likelihood += torch.sum(
(1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred))
)
log_likelihood += torch.sum((1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred)))

elif props['type'] in ['count']:
r_idx, p_idx, missing_idx = props['indices']
Expand All @@ -428,9 +426,7 @@ def _compute_loss(self, X_padded, Y_padded, seq_len):
p_true = X_padded[: seq_len[i], i, missing_idx]
p_pred = missing[: seq_len[i], i]
log_likelihood += torch.sum(p_true * p_pred)
log_likelihood += torch.sum(
(1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred))
)
log_likelihood += torch.sum((1.0 - p_true) * torch.log(1.0 - torch.exp(p_pred)))

elif props['type'] in ['categorical', 'ordinal']:
idx = list(props['indices'].values())
Expand Down
6 changes: 3 additions & 3 deletions deepecho/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def segment_by_time(sequence, segment_size, sequence_index):
while start <= max_time:
end = start + segment_size
selected = (start <= sequence_index) & (sequence_index < end)
sequences.append(sequence[selected.values].reset_index(drop=True))
sequences.append(sequence[selected.to_numpy()].reset_index(drop=True))
start = end

return sequences
Expand Down Expand Up @@ -112,7 +112,7 @@ def _convert_to_dicts(segments, context_columns):
if len(context.drop_duplicates()) > 1:
raise ValueError('Context columns are not constant within each segment.')

context = context.iloc[0].values
context = context.iloc[0].to_numpy()
segment = segment.drop(context_columns, axis=1)
else:
context = []
Expand Down Expand Up @@ -180,7 +180,7 @@ def assemble_sequences(
segments = []
groupby_columns = entity_columns[0] if len(entity_columns) == 1 else entity_columns
for _, sequence in data.groupby(groupby_columns):
sequence.drop(entity_columns, axis=1, inplace=True)
sequence = sequence.drop(entity_columns, axis=1)
if context_columns:
if len(sequence[context_columns].drop_duplicates()) > 1:
raise ValueError('Context columns are not constant within each entity.')
Expand Down
30 changes: 25 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ dev = [
'watchdog>=1.0.1,<5',

# style check
'ruff>=0.3.2,<0.7.2',
'ruff>=0.3.2,<1',

# distribute on PyPI
'twine>=1.10.0,<4',
Expand Down Expand Up @@ -160,15 +160,18 @@ build-backend = 'setuptools.build_meta'

[tool.ruff]
preview = true
line-length = 99
line-length = 100
indent-width = 4
src = ["deepecho"]
target-version = "py312"
exclude = [
"docs",
".tox",
".git",
"__pycache__",
".ipynb_checkpoints"
"*.ipynb",
".ipynb_checkpoints",
"tasks.py",
]

[tool.ruff.lint]
Expand All @@ -178,20 +181,31 @@ select = [
# Pycodestyle
"E",
"W",
# pydocstyle
"D",
# isort
"I001"
"I001",
# print statements
"T201",
# pandas-vet
"PD",
# numpy 2.0
"NPY201"
]
ignore = [
"E501",
# pydocstyle
"D107", # Missing docstring in __init__
"D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
"PD901",
"PD101",
]

[tool.ruff.lint.pep8-naming]
extend-ignore-names = ["X", "C", "X_padded", "Y", "Y_padded"]

[tool.ruff.lint.isort]
known-first-party = ["deepecho"]
lines-between-types = 0

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"]
Expand All @@ -200,6 +214,12 @@ known-first-party = ["deepecho"]
quote-style = "single"
indent-style = "space"
preview = true
docstring-code-format = true
docstring-code-line-length = "dynamic"

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.lint.pycodestyle]
max-doc-length = 100
max-line-length = 100

0 comments on commit 67990b9

Please sign in to comment.