Skip to content

Commit

Permalink
update linter
Browse files Browse the repository at this point in the history
  • Loading branch information
d-chambers committed Aug 17, 2024
1 parent 7f06c6c commit 357ec8e
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 57 deletions.
45 changes: 17 additions & 28 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,38 +1,27 @@
exclude: scripts/
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-merge-conflict
- id: mixed-line-ending
args: ['--fix=lf']
- repo: https://github.com/psf/black
rev: 22.6.0

# Ruff is a replacement for flake8 and many other linters (much faster too)
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.6.1
hooks:
- id: black
- repo: https://github.com/PyCQA/flake8
rev: 3.8.3
- id: ruff
args: ["--fix"]
# Run the formatter.
- id: ruff-format

# ensures __future__ import annotations at top of files which require it
# for the typing features they are using.
- repo: https://github.com/frostming/fix-future-annotations
rev: 0.5.0
hooks:
- id: flake8
additional_dependencies:
- flake8-black
- flake8-breakpoint
- flake8-docstrings
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black"]
- id: isort
name: isort (cython)
types: [cython]
- id: isort
name: isort (pyi)
types: [pyi]
- repo: https://github.com/kynan/nbstripout
rev: 0.3.9
hooks:
- id: nbstripout
files: ".ipynb"
- id: fix-future-annotations
73 changes: 73 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,76 @@ dev = ["dbscan1d[test]"]
"Bug Tracker" = "https://github.com/d-chambers/dbscan1d/issues"
"Documentation" = "https://github.com/d-chambers/dbscan1d"
"Homepage" = "https://github.com/d-chambers/dbscan1d"

# --- formatting

[tool.ruff]

line-length = 88

# enable certain types of linting
lint.select = [
"E",
"F",
"UP",
"RUF",
"I001",
"D",
"FA",
"T",
"N",
"NPY",
"NPY201",
]

# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
".direnv",
".eggs",
".git",
".git-rewrite",
".hg",
".mypy_cache",
".nox",
".pants.d",
".pytype",
".ruff_cache",
".svn",
".tox",
".venv",
"__pypackages__",
"_build",
"buck-out",
"build",
"dist",
"node_modules",
"venv",
"__init__.py"
]

# lowest python version supported
target-version = "py310"

lint.fixable = ["ALL"]

# List of codes to ignore
lint.ignore = ["D105", "D107", "D401", "D205", "D200", "D400", "N803", "N806"]

[tool.ruff.lint.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 10

# config for docstring parsing
[tool.ruff.lint.pydocstyle]
convention = "numpy"

[tool.pytest.ini_options]
filterwarnings = [
# Ignore hdf5 warnings from pytables, See pytables #1035
'ignore::Warning:tables:'
]

[tool.ruff.format]
# Use `\n` line endings for all files
line-ending = "lf"
41 changes: 24 additions & 17 deletions scripts/profile_dbscan1d.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@
"\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"from dbscan1d import DBSCAN1D\n",
"from sklearn.cluster import DBSCAN\n",
"from sklearn.datasets import make_blobs\n",
"\n",
"from dbscan1d import DBSCAN1D\n",
"\n",
"n_points = [10, 100, 1_000, 10_000, 20_000, 30_000, 40_000,]\n",
"n_points = [\n",
" 10,\n",
" 100,\n",
" 1_000,\n",
" 10_000,\n",
" 20_000,\n",
" 30_000,\n",
" 40_000,\n",
"]\n",
"centers = 2"
]
},
Expand All @@ -46,8 +53,8 @@
"outputs": [],
"source": [
"# Profile\n",
"db1 = DBSCAN1D(.5, 4)\n",
"db2 = DBSCAN(.5, 4)"
"db1 = DBSCAN1D(0.5, 4)\n",
"db2 = DBSCAN(0.5, 4)"
]
},
{
Expand All @@ -57,16 +64,16 @@
"outputs": [],
"source": [
"# profile each stream type with each function\n",
"df = pd.DataFrame(columns=['dbscan', 'dbscan1d'], index=n_points)\n",
"df = pd.DataFrame(columns=[\"dbscan\", \"dbscan1d\"], index=n_points)\n",
"for n_point in n_points:\n",
" print(f'on {n_point}')\n",
" print(f\"on {n_point}\")\n",
" X = create_blobs(n_point, centers)\n",
" print('starting dbscan1d')\n",
" print(\"starting dbscan1d\")\n",
" ti1 = %timeit -o db1.fit_predict(X)\n",
" df.loc[n_point, 'dbscan1d'] = ti1.best\n",
" print('starting dbscan')\n",
" df.loc[n_point, \"dbscan1d\"] = ti1.best\n",
" print(\"starting dbscan\")\n",
" ti2 = %timeit -o db2.fit_predict(X)\n",
" df.loc[n_point, 'dbscan'] = ti2.best\n",
" df.loc[n_point, \"dbscan\"] = ti2.best\n",
" print()\n",
" print()"
]
Expand All @@ -89,20 +96,20 @@
},
"outputs": [],
"source": [
"out_path = Path(__file__).parent / 'profile_results.png'\n",
"out_path = Path(__file__).parent / \"profile_results.png\"\n",
"\n",
"x = df.index.values\n",
"plt.loglog(x, df['dbscan'].values, label='dbscan', color='r')\n",
"plt.loglog(x, df['dbscan1d'].values, label='dbscan1d', color='b')\n",
"plt.loglog(x, df[\"dbscan\"].values, label=\"dbscan\", color=\"r\")\n",
"plt.loglog(x, df[\"dbscan1d\"].values, label=\"dbscan1d\", color=\"b\")\n",
"\n",
"plt.xlabel('number of points')\n",
"plt.ylabel('run time (s)')\n",
"plt.xlabel(\"number of points\")\n",
"plt.ylabel(\"run time (s)\")\n",
"\n",
"plt.legend()\n",
"\n",
"plt.savefig(out_path)\n",
"\n",
"plt.show()\n"
"plt.show()"
]
}
],
Expand Down
9 changes: 5 additions & 4 deletions src/dbscan1d/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
It should be *much* more efficient for large datasets.
"""
from typing import Optional

from __future__ import annotations

import numpy as np

Expand All @@ -17,9 +18,9 @@ class DBSCAN1D:
"""

# params that change upon fit/training
core_sample_indices_: Optional[np.ndarray] = None
components_: Optional[np.ndarray] = None
labels_: Optional[np.ndarray] = None
core_sample_indices_: np.ndarray | None = None
components_: np.ndarray | None = None
labels_: np.ndarray | None = None

def __init__(self, eps: float = 0.5, min_samples: int = 5, metric="euclidean"):
self.eps = eps
Expand Down
5 changes: 3 additions & 2 deletions src/dbscan1d/version.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Module for reporting the version of dbscan1d."""

from importlib.metadata import PackageNotFoundError, version

try:
__version__ = version("dbscan1d")
# package is not installed
except PackageNotFoundError: # NOQA
__version__ = "0.0.0" # NOQA
except PackageNotFoundError:
__version__ = "0.0.0"
14 changes: 8 additions & 6 deletions tests/test_dbscan1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Requires sklearn.
"""

import copy
from itertools import product
from pathlib import Path
Expand Down Expand Up @@ -122,7 +123,8 @@ def generate_test_data(num_points, centers=None):
num_points, n_features=1, centers=centers, random_state=13
)
X = blobs.flatten()
np.random.shuffle(X)
rng = np.random.default_rng()
rng.shuffle(X)
return X, blob_labels


Expand All @@ -139,18 +141,18 @@ class TestSKleanEquivalent:

# define a small range of dbscan input params over which tests will
# be parametrized
eps_values = [0.0001, 0.1, 0.5, 1, 2]
min_samples_values = [1, 2, 5, 15]
db_params = list(product(eps_values, min_samples_values))
eps_values = (0.0001, 0.1, 0.5, 1, 2)
min_samples_values = (1, 2, 5, 15)
db_params = tuple(product(eps_values, min_samples_values))

centers = [
centers = (
np.array([0, 5, 10]),
np.arange(10),
np.array([1, 2, 3, 4, 5, 10]),
np.array([1, 1.1, 1.2, 1.3, 1.4, 1.5]),
2,
7,
]
)

@pytest.fixture(scope="class", params=centers)
def blobs(self, request):
Expand Down

0 comments on commit 357ec8e

Please sign in to comment.