Merge pull request #315 from OpenCOMPES/ruff_linting

Ruff linting
OpenCOMPES · Dec 24, 2023 · 28a95ca · 28a95ca
2 parents eb3b81a + 70c29f3
commit 28a95ca
Show file tree

Hide file tree

Showing 17 changed files with 374 additions and 460 deletions.
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -25,14 +25,14 @@ jobs:
           poetry-version: 1.2.2
 
       # Linting steps, excute all linters even if one fails
-      - name: pycodestyle
+      - name: ruff
         run:
-          poetry run pycodestyle --ignore=E203,E501,W503 sed tests
-      - name: pylint
+          poetry run ruff sed tests
+      - name: ruff formating
         if: ${{ always() }}
         run:
-          poetry run pylint -j 0 --good-names=i,j,k,ex,x,y,t,k,v,ax,df,ec,mc,dc,ct --disable=fixme,too-many-branches,too-many-locals,too-many-statements,too-many-arguments,too-many-lines,too-many-public-methods,too-many-instance-attributes,too-few-public-methods sed tests
+          poetry run ruff format --check sed tests
       - name: mypy
         if: ${{ always() }}
         run:
-          poetry run mypy --ignore-missing-imports --follow-imports=silent --no-strict-optional sed tests
+          poetry run mypy sed tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -11,15 +11,18 @@ repos:
     -   id: check-ast
     -   id: check-docstring-first
 
--   repo: https://github.com/psf/black
-    rev: 22.3.0
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.1.7
     hooks:
-    -   id: black
-        args: [--line-length=100]
--   repo: https://github.com/PyCQA/flake8
-    rev: 5.0.4
+      # Run the linter.
+      - id: ruff
+      # Run the formatter.
+      - id: ruff-format
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.7.1
     hooks:
-    -   id: flake8
+      - id: mypy
 -   repo: https://github.com/asottile/reorder_python_imports
     rev: v3.8.2
     hooks:

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 Backend to handle photoelectron resolved datastreams.
 
 [![Documentation Status](https://github.com/OpenCOMPES/sed/actions/workflows/documentation.yml/badge.svg)](https://opencompes.github.io/sed/)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 ![](https://github.com/OpenCOMPES/sed/actions/workflows/linting.yml/badge.svg?branch=main)
 ![](https://github.com/OpenCOMPES/sed/actions/workflows/testing_multiversion.yml/badge.svg?branch=main)
 ![](https://img.shields.io/pypi/pyversions/sed-processor)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,9 +52,8 @@ pytest = "^7.0.1"
 pytest-cov = "^3.0.0"
 pytest-xdist = "^2.5.0"
 pytest-clarity = "^1.0.1"
-pylint = "^3.0.1"
+ruff = "^0.1.7"
 mypy = "^1.6.0"
-pycodestyle = "^2.11.1"
 types-pyyaml = "^6.0.12.12"
 types-requests = "^2.31.0.9"
 pyfakefs = "^5.3.0"
@@ -79,3 +78,30 @@ omit = [
     "config.py",
     "config-3.py",
 ]
+
+[tool.ruff]
+include = ["sed/*.py", "tests/*.py"]
+select = [
+    "E", # pycodestyle
+    "W", # pycodestyle
+    "PL", # pylint
+]
+ignore = [
+    "E701", # Multiple statements on one line (colon)
+    "PLR0911", # Too many return statements
+    "PLR0912", # Too many branches
+    "PLR0913", # Too many arguments in function definition
+    "PLR0915", # Too many statements
+    "PLR2004", # Magic value used instead of constant
+    "PLR5501", # else-if-used
+    "PLW2901", # redefined-loop-name
+]
+fixable = ["ALL"]
+line-length = 100 # change the default line length number or characters.
+
+[tool.mypy]
+strict = false
+ignore_missing_imports = true
+follow_imports = "silent"
+no_strict_optional = true
+disable_error_code = "import, annotation-unchecked"
diff --git a/sed/binning/binning.py b/sed/binning/binning.py
@@ -316,13 +316,10 @@ def bin_dataframe(
 
     # limit multithreading in worker threads
     with threadpool_limits(limits=threads_per_worker, user_api=threadpool_api):
-
         # Main loop for binning
         for i in tqdm(range(0, df.npartitions, n_cores), disable=not pbar):
-
             core_tasks = []  # Core-level jobs
             for j in range(0, n_cores):
-
                 partition_index = i + j
                 if partition_index >= df.npartitions:
                     break

diff --git a/sed/calibrator/energy.py b/sed/calibrator/energy.py
@@ -461,7 +461,6 @@ def add_ranges(
             newranges: List[Tuple] = []
 
             for i in range(self.ntraces):
-
                 pathcorr = find_correspondence(
                     traces[ref_id, :],
                     traces[i, :],
@@ -651,7 +650,6 @@ def view(  # pylint: disable=dangerous-default-value
         sign = 1 if energy_scale == "kinetic" else -1
 
         if backend == "matplotlib":
-
             figsize = kwds.pop("figsize", (12, 4))
             fig, ax = plt.subplots(figsize=figsize)
             for itr, trace in enumerate(traces):
@@ -704,7 +702,6 @@ def view(  # pylint: disable=dangerous-default-value
             ax.set_title(ttl)
 
         elif backend == "bokeh":
-
             output_notebook(hide_banner=True)
             colors = it.cycle(ColorCycle[10])
             ttp = [("(x, y)", "($x, $y)")]
@@ -1767,7 +1764,6 @@ def normspec(
     specnorm = []
 
     for i in range(nspec):
-
         spec = specs[i]
 
         if smooth:
@@ -1879,7 +1875,6 @@ def peaksearch(
         plt.figure(figsize=(10, 4))
 
     for rng, trace in zip(ranges, traces.tolist()):
-
         cond = (tof >= rng[0]) & (tof <= rng[1])
         trace = np.array(trace).ravel()
         tofseg, trseg = tof[cond], trace[cond]
@@ -1991,7 +1986,6 @@ def peakdetect1d(
     for index, (x, y) in enumerate(
         zip(x_axis[:-lookahead], y_axis[:-lookahead]),
     ):
-
         if y > _max:
             _max = y
             _max_pos = x
@@ -2005,7 +1999,6 @@ def peakdetect1d(
             # Maxima peak candidate found
             # look ahead in signal to ensure that this is a peak and not jitter
             if y_axis[index : index + lookahead].max() < _max:
-
                 max_peaks.append([_max_pos, _max])
                 dump.append(True)
                 # Set algorithm to only find minima now
@@ -2025,7 +2018,6 @@ def peakdetect1d(
             # Minima peak candidate found
             # look ahead in signal to ensure that this is a peak and not jitter
             if y_axis[index : index + lookahead].min() > _min:
-
                 min_peaks.append([_min_pos, _min])
                 dump.append(False)
                 # Set algorithm to only find maxima now

diff --git a/sed/calibrator/momentum.py b/sed/calibrator/momentum.py
@@ -250,7 +250,6 @@ def update(plane: int, width: int):
         )
 
         def apply_fun(apply: bool):  # pylint: disable=unused-argument
-
             start = plane_slider.value
             stop = plane_slider.value + width_slider.value
 
@@ -439,7 +438,6 @@ def feature_extract(
                 raise ValueError("No image loaded for feature extraction!")
 
         if feature_type == "points":
-
             # Detect the point landmarks
             self.peaks = po.peakdetect2d(image, **kwds)
 
@@ -551,7 +549,6 @@ def onclick(event):
         cid = fig.canvas.mpl_connect("button_press_event", onclick)
 
         def apply_func(apply: bool):  # pylint: disable=unused-argument
-
             fig.canvas.mpl_disconnect(cid)
 
             point_no_input.close()
@@ -1256,7 +1253,6 @@ def view(  # pylint: disable=dangerous-default-value
                             )
 
         elif backend == "bokeh":
-
             output_notebook(hide_banner=True)
             colors = it.cycle(ColorCycle[10])
             ttp = [("(x, y)", "($x, $y)")]
@@ -1281,7 +1277,6 @@ def view(  # pylint: disable=dangerous-default-value
 
             if annotated is True:
                 for p_keys, p_vals in points.items():
-
                     try:
                         xcirc, ycirc = p_vals[:, 0], p_vals[:, 1]
                         fig.scatter(

diff --git a/sed/core/dfops.py b/sed/core/dfops.py
@@ -64,7 +64,7 @@ def apply_jitter(
         # jitter sizes that don't match the original bin sizes
         jitter = np.random.standard_normal(size=colsize)
 
-    for (col, col_jittered, amp) in zip(cols, cols_jittered, amps):
+    for col, col_jittered, amp in zip(cols, cols_jittered, amps):
         df[col_jittered] = df[col] + amp * jitter
 
     return df

diff --git a/sed/core/processor.py b/sed/core/processor.py
@@ -827,7 +827,6 @@ def apply_momentum_calibration(
             preview (bool): Option to preview the first elements of the data frame.
         """
         if self._dataframe is not None:
-
             print("Adding kx/ky columns to dataframe:")
             self._dataframe, metadata = self.mc.append_k_axis(
                 df=self._dataframe,
@@ -1049,7 +1048,6 @@ def load_bias_series(
             self.ec.load_data(biases=biases, traces=traces, tof=tof)
 
         elif data_files is not None:
-
             self.ec.bin_data(
                 data_files=cast(List[str], self.cpy(data_files)),
                 axes=axes,
@@ -1250,7 +1248,7 @@ def save_energy_calibration(
             filename = "sed_config.yaml"
         calibration = {}
         try:
-            for (key, value) in self.ec.calibration.items():
+            for key, value in self.ec.calibration.items():
                 if key in ["axis", "refid", "Tmat", "bvec"]:
                     continue
                 if key == "energy_scale":

diff --git a/sed/diagnostics.py b/sed/diagnostics.py
@@ -86,15 +86,13 @@ def grid_histogram(
     figsz = kwds.pop("figsize", (14, 8))
 
     if backend == "matplotlib":
-
         nrv = len(rvs)
         nrow = int(np.ceil(nrv / ncol))
         histtype = kwds.pop("histtype", "step")
 
         fig, ax = plt.subplots(nrow, ncol, figsize=figsz)
         otherax = ax.copy()
         for i, zipped in enumerate(zip(rvs, rvbins, rvranges)):
-
             # Make each histogram plot
             rvname, rvbin, rvrg = zipped
             try:
@@ -131,12 +129,10 @@ def grid_histogram(
                 fig.delaxes(oax)
 
     elif backend == "bokeh":
-
         output_notebook(hide_banner=True)
 
         plots = []
         for i, zipped in enumerate(zip(rvs, rvbins, rvranges)):
-
             rvname, rvbin, rvrg = zipped
             histvals, edges = np.histogram(dct[rvname], bins=rvbin, range=rvrg)
 

diff --git a/sed/io/hdf5.py b/sed/io/hdf5.py
@@ -94,7 +94,6 @@ def to_h5(data: xr.DataArray, faddr: str, mode: str = "w"):
             saving.
     """
     with h5py.File(faddr, mode) as h5_file:
-
         print(f"saving data to {faddr}")
 
         # Saving data, make a single dataset

diff --git a/sed/loader/flash/loader.py b/sed/loader/flash/loader.py
@@ -45,7 +45,6 @@ class FlashLoader(BaseLoader):
     supported_file_types = ["h5"]
 
     def __init__(self, config: dict) -> None:
-
         super().__init__(config=config)
         self.multi_index = ["trainId", "pulseId", "electronId"]
         self.index_per_electron: MultiIndex = None

diff --git a/sed/loader/mirrorutil.py b/sed/loader/mirrorutil.py
@@ -225,7 +225,7 @@ def cleanup_oldest_scan(
             total_size = 0
             for scan in scan_dirs:
                 size = 0
-                for (path, dirs, filenames) in os.walk(  # pylint: disable=W0612
+                for path, dirs, filenames in os.walk(  # pylint: disable=W0612
                     scan,
                 ):
                     for sfile in filenames:
@@ -240,7 +240,7 @@ def cleanup_oldest_scan(
         oldest_scan = None
         for scan in scan_dirs:
             size = 0
-            for (path, dirs, filenames) in os.walk(  # pylint: disable=W0612
+            for path, dirs, filenames in os.walk(  # pylint: disable=W0612
                 scan,
             ):
                 for sfile in filenames:

diff --git a/sed/loader/mpes/loader.py b/sed/loader/mpes/loader.py
@@ -7,12 +7,14 @@
 import glob
 import json
 import os
-import urllib
 from typing import Dict
 from typing import List
 from typing import Sequence
 from typing import Tuple
 from typing import Union
+from urllib.error import HTTPError
+from urllib.error import URLError
+from urllib.request import urlopen
 
 import dask
 import dask.array as da
@@ -255,7 +257,6 @@ def hdf5_to_array(
     # Read out groups:
     data_list = []
     for group in group_names:
-
         g_dataset = np.asarray(h5file[group])
         if bool(data_type):
             g_dataset = g_dataset.astype(data_type)
@@ -341,7 +342,6 @@ def hdf5_to_timed_array(
     data_list = []
     ms_marker = np.asarray(h5file[ms_markers_group])
     for group in group_names:
-
         g_dataset = np.asarray(h5file[group])
         if bool(data_type):
             g_dataset = g_dataset.astype(data_type)
@@ -461,7 +461,7 @@ def get_archiver_data(
     iso_from = datetime.datetime.utcfromtimestamp(ts_from).isoformat()
     iso_to = datetime.datetime.utcfromtimestamp(ts_to).isoformat()
     req_str = archiver_url + archiver_channel + "&from=" + iso_from + "Z&to=" + iso_to + "Z"
-    with urllib.request.urlopen(req_str) as req:
+    with urlopen(req_str) as req:
         data = json.load(req)
         secs = [x["secs"] + x["nanos"] * 1e-9 for x in data[0]["data"]]
         vals = [x["val"] for x in data[0]["data"]]
@@ -767,14 +767,14 @@ def gather_metadata(
                 print(
                     f"Data for channel {channel} doesn't exist for time {start}",
                 )
-            except urllib.error.HTTPError as exc:
+            except HTTPError as exc:
                 print(
                     f"Incorrect URL for the archive channel {channel}. "
                     "Make sure that the channel name and file start and end times are "
                     "correct.",
                 )
                 print("Error code: ", exc)
-            except urllib.error.URLError as exc:
+            except URLError as exc:
                 print(
                     f"Cannot access the archive URL for channel {channel}. "
                     f"Make sure that you are within the FHI network."

diff --git a/sed/loader/sxp/loader.py b/sed/loader/sxp/loader.py
@@ -46,7 +46,6 @@ class SXPLoader(BaseLoader):
     supported_file_types = ["h5"]
 
     def __init__(self, config: dict) -> None:
-
         super().__init__(config=config)
         self.multi_index = ["trainId", "pulseId", "electronId"]
         self.index_per_electron: MultiIndex = None
-Original file line number
+Diff line change
@@ Expand Up / @@ -94,7 +94,6 @@ def to_h5(data: xr.DataArray, faddr: str, mode: str = "w"): @@
                 saving.
         """
         with h5py.File(faddr, mode) as h5_file:
             print(f"saving data to {faddr}")
             # Saving data, make a single dataset
@@ Expand Down @@