OpenCOMPES · zain-sohail · Jun 23, 2024 · Apr 11, 2024 · Apr 11, 2024 · Apr 11, 2024
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -28,8 +28,8 @@ jobs:
       - name: "Setup Python, Poetry and Dependencies"
         uses: packetcoders/action-setup-cache-python-poetry@main
         with:
-          python-version: 3.8
-          poetry-version: 1.2.2
+          python-version: 3.9
+          poetry-version: 1.8.3
 
       # Run benchmakrs
       - name: Run benchmarks on python 3.8

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -47,8 +47,8 @@ jobs:
       - name: "Setup Python, Poetry and Dependencies"
         uses: packetcoders/action-setup-cache-python-poetry@main
         with:
-          python-version: 3.8
-          poetry-version: 1.2.2
+          python-version: 3.9
+          poetry-version: 1.8.3
 
       - name: Install notebook dependencies
         run: poetry install -E notebook --with docs

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -19,8 +19,8 @@ jobs:
       - name: "Setup Python, Poetry and Dependencies"
         uses: packetcoders/action-setup-cache-python-poetry@main
         with:
-          python-version: 3.8
-          poetry-version: 1.2.2
+          python-version: 3.9
+          poetry-version: 1.8.3
 
       # Linting steps, excute all linters even if one fails
       - name: ruff

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -39,8 +39,8 @@ jobs:
       - name: "Setup Python, Poetry and Dependencies"
         uses: zain-sohail/action-setup-cache-python-poetry@main
         with:
-          python-version: 3.8
-          poetry-version: 1.2.2
+          python-version: 3.9
+          poetry-version: 1.8.3
           working-directory: sed-processor
 
       - name: Change to distribution name in toml file
@@ -82,8 +82,8 @@ jobs:
       - name: "Setup Python, Poetry and Dependencies"
         uses: zain-sohail/action-setup-cache-python-poetry@main
         with:
-          python-version: 3.8
-          poetry-version: 1.2.2
+          python-version: 3.9
+          poetry-version: 1.8.3
           working-directory: sed-processor
 
       - name: Change to distribution name in toml file

diff --git a/.github/workflows/testing_coverage.yml b/.github/workflows/testing_coverage.yml
@@ -23,11 +23,11 @@ jobs:
       - name: "Setup Python, Poetry and Dependencies"
         uses: packetcoders/action-setup-cache-python-poetry@main
         with:
-          python-version: 3.8
-          poetry-version: 1.2.2
+          python-version: 3.9
+          poetry-version: 1.8.3
 
       # Run pytest with coverage report, saving to xml
-      - name: Run tests on python 3.8
+      - name: Run tests on python 3.9
         run: |
           poetry run pytest --cov --cov-report xml:cobertura.xml --full-trace --show-capture=no -sv -n auto tests/
 

diff --git a/.github/workflows/testing_multiversion.yml b/.github/workflows/testing_multiversion.yml
@@ -1,9 +1,10 @@
-name: unit tests [Python 3.8|3.9|3.10|3.11]
+# Tests for all supported versions [Python 3.9|3.10|3.11|3.12]
+name: Unit Tests
 
 on:
   workflow_dispatch:
   push:
-    branches: [ main ]
+    branches: [ main, v1_feature_branch ]
     paths-ignore:
       pyproject.toml
 
@@ -12,7 +13,7 @@ jobs:
     # Using matrix strategy
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11.8", "3.12.2"]
     runs-on: ubuntu-latest
     steps:
       # Check out repo and set up Python
@@ -25,7 +26,7 @@ jobs:
         uses: packetcoders/action-setup-cache-python-poetry@main
         with:
           python-version: ${{matrix.python-version}}
-          poetry-version: 1.2.2
+          poetry-version: 1.8.3
 
       # Use cached python and dependencies, install poetry
       - name: Run tests on python ${{matrix.python-version}}

diff --git a/.github/workflows/update_dependencies.yml b/.github/workflows/update_dependencies.yml
@@ -28,8 +28,8 @@ jobs:
       - name: "Setup Python, Poetry and Dependencies"
         uses: packetcoders/action-setup-cache-python-poetry@main
         with:
-          python-version: 3.8
-          poetry-version: 1.2.2
+          python-version: 3.9
+          poetry-version: 1.8.3
 
       # update poetry lockfile
       - name: "Update poetry lock file"

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -27,12 +27,12 @@ repos:
     rev: v3.8.2
     hooks:
     -   id: reorder-python-imports
-        args: [--application-directories, '.:src', --py36-plus]
+        args: [--application-directories, '.:src', --py39-plus]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.37.3
+    rev: v3.16.0
     hooks:
     -   id: pyupgrade
-        args: [--py36-plus]
+        args: [--py39-plus]
 -   repo: https://github.com/asottile/add-trailing-comma
     rev: v2.2.3
     hooks:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,9 +13,9 @@ keywords = ["sed", "mpes", "flash", "arpes"]
 license = "MIT"
 
 [tool.poetry.dependencies]
-python = ">=3.8, <3.11.9"
+python = ">=3.9, <3.12.3, !=3.11.9"
 bokeh = ">=2.4.2"
-dask = ">=2021.12.0"
+dask = {version = ">=2021.12.0, <2023.12.1"}
 fastdtw = ">=0.3.4"
 h5py = ">=3.6.0"
 ipympl = ">=0.9.1"
@@ -43,7 +43,6 @@ ipykernel = {version = ">=6.9.1", optional = true}
 jupyterlab = {version = "^3.4.0", optional = true}
 jupyterlab-h5web = {version = "^8.0.0", extras = ["full"]}
 
-
 [tool.poetry.extras]
 notebook = ["jupyter", "ipykernel", "jupyterlab", "jupyterlab-h5web"]
 all = ["notebook"]
@@ -59,7 +58,7 @@ types-pyyaml = ">=6.0.12.12"
 types-requests = ">=2.31.0.9"
 pyfakefs = ">=5.3.0"
 requests-mock = "^1.11.0"
-
+pre-commit = ">=3.0.0"
 
 [tool.poetry.group.docs]
 optional = true

diff --git a/sed/__init__.py b/sed/__init__.py
@@ -3,5 +3,4 @@
 """
 from .core.processor import SedProcessor
 
-__version__ = "0.1.0"
 __all__ = ["SedProcessor"]
diff --git a/sed/binning/binning.py b/sed/binning/binning.py
@@ -1,12 +1,11 @@
 """This module contains the binning functions of the sed.binning module
-
 """
+from __future__ import annotations
+
 import gc
+from collections.abc import Sequence
 from functools import reduce
 from typing import cast
-from typing import List
-from typing import Sequence
-from typing import Tuple
 from typing import Union
 
 import dask.dataframe
@@ -26,27 +25,21 @@
 
 
 def bin_partition(
-    part: Union[dask.dataframe.DataFrame, pd.DataFrame],
-    bins: Union[
-        int,
-        dict,
-        Sequence[int],
-        Sequence[np.ndarray],
-        Sequence[tuple],
-    ] = 100,
+    part: dask.dataframe.DataFrame | pd.DataFrame,
+    bins: int | dict | Sequence[int] | Sequence[np.ndarray] | Sequence[tuple] = 100,
     axes: Sequence[str] = None,
-    ranges: Sequence[Tuple[float, float]] = None,
+    ranges: Sequence[tuple[float, float]] = None,
     hist_mode: str = "numba",
-    jitter: Union[list, dict] = None,
+    jitter: list | dict = None,
     return_edges: bool = False,
     skip_test: bool = False,
-) -> Union[np.ndarray, Tuple[np.ndarray, list]]:
+) -> np.ndarray | tuple[np.ndarray, list]:
     """Compute the n-dimensional histogram of a single dataframe partition.
 
     Args:
-        part (Union[dask.dataframe.DataFrame, pd.DataFrame]): dataframe on which
+        part (dask.dataframe.DataFrame | pd.DataFrame): dataframe on which
             to perform the histogram. Usually a partition of a dask DataFrame.
-        bins (int, dict, Sequence[int], Sequence[np.ndarray], Sequence[tuple], optional):
+        bins (int | dict | Sequence[int] | Sequence[np.ndarray] | Sequence[tuple], optional):
             Definition of the bins. Can  be any of the following cases:
 
                 - an integer describing the number of bins for all dimensions. This
@@ -70,7 +63,7 @@ def bin_partition(
             the order of the dimensions in the resulting array. Only not required if
             bins are provided as dictionary containing the axis names.
             Defaults to None.
-        ranges (Sequence[Tuple[float, float]], optional): Sequence of tuples containing
+        ranges (Sequence[tuple[float, float]], optional): Sequence of tuples containing
             the start and end point of the binning range. Required if bins given as
             int or Sequence[int]. Defaults to None.
         hist_mode (str, optional): Histogram calculation method.
@@ -79,7 +72,7 @@ def bin_partition(
                 - "numba" use a numba powered similar method.
 
             Defaults to "numba".
-        jitter (Union[list, dict], optional): a list of the axes on which to apply
+        jitter (list | dict, optional): a list of the axes on which to apply
             jittering. To specify the jitter amplitude or method (normal or uniform
             noise) a dictionary can be passed. This should look like
             jitter={'axis':{'amplitude':0.5,'mode':'uniform'}}.
@@ -102,8 +95,8 @@ def bin_partition(
             present in the dataframe
 
     Returns:
-        Union[np.ndarray, Tuple[np.ndarray, list]]: 2-element tuple returned only when
-        returnEdges is True. Otherwise only hist is returned.
+        np.ndarray | tuple[np.ndarray: 2-element tuple returned only when
+        return_edges is True. Otherwise only hist is returned.
 
         - **hist**: The result of the n-dimensional binning
         - **edges**: A list of D arrays describing the bin edges for each dimension.
@@ -122,17 +115,17 @@ def bin_partition(
             raise TypeError(
                 "axes needs to be of type 'List[str]' if tests are skipped!",
             )
-        bins = cast(Union[List[int], List[np.ndarray]], bins)
-        axes = cast(List[str], axes)
-        ranges = cast(List[Tuple[float, float]], ranges)
+        bins = cast(Union[list[int], list[np.ndarray]], bins)
+        axes = cast(list[str], axes)
+        ranges = cast(list[tuple[float, float]], ranges)
 
     # convert bin centers to bin edges:
     if all(isinstance(x, np.ndarray) for x in bins):
-        bins = cast(List[np.ndarray], bins)
+        bins = cast(list[np.ndarray], bins)
         for i, bin_centers in enumerate(bins):
             bins[i] = bin_centers_to_bin_edges(bin_centers)
     else:
-        bins = cast(List[int], bins)
+        bins = cast(list[int], bins)
         # shift ranges by half a bin size to align the bin centers to the given ranges,
         # as the histogram functions interprete the ranges as limits for the edges.
         for i, nbins in enumerate(bins):
@@ -203,18 +196,12 @@ def bin_partition(
 
 def bin_dataframe(
     df: dask.dataframe.DataFrame,
-    bins: Union[
-        int,
-        dict,
-        Sequence[int],
-        Sequence[np.ndarray],
-        Sequence[tuple],
-    ] = 100,
+    bins: int | dict | Sequence[int] | Sequence[np.ndarray] | Sequence[tuple] = 100,
     axes: Sequence[str] = None,
-    ranges: Sequence[Tuple[float, float]] = None,
+    ranges: Sequence[tuple[float, float]] = None,
     hist_mode: str = "numba",
     mode: str = "fast",
-    jitter: Union[list, dict] = None,
+    jitter: list | dict = None,
     pbar: bool = True,
     n_cores: int = N_CPU - 1,
     threads_per_worker: int = 4,
@@ -228,7 +215,7 @@ def bin_dataframe(
     Args:
         df (dask.dataframe.DataFrame): a dask.DataFrame on which to perform the
             histogram.
-            bins (int, dict, Sequence[int], Sequence[np.ndarray], Sequence[tuple], optional):
+        bins (int | dict | Sequence[int] | Sequence[np.ndarray] | Sequence[tuple], optional):
             Definition of the bins. Can be any of the following cases:
 
                 - an integer describing the number of bins for all dimensions. This
@@ -252,7 +239,7 @@ def bin_dataframe(
             the order of the dimensions in the resulting array. Only not required if
             bins are provided as dictionary containing the axis names.
             Defaults to None.
-        ranges (Sequence[Tuple[float, float]], optional): Sequence of tuples containing
+        ranges (Sequence[tuple[float, float]], optional): Sequence of tuples containing
             the start and end point of the binning range. Required if bins given as
             int or Sequence[int]. Defaults to None.
         hist_mode (str, optional): Histogram calculation method.
@@ -269,7 +256,7 @@ def bin_dataframe(
                 - 'legacy': Single-core recombination of partition results.
 
             Defaults to "fast".
-        jitter (Union[list, dict], optional): a list of the axes on which to apply
+        jitter (list | dict, optional): a list of the axes on which to apply
             jittering. To specify the jitter amplitude or method (normal or uniform
             noise) a dictionary can be passed. This should look like
             jitter={'axis':{'amplitude':0.5,'mode':'uniform'}}.
@@ -304,14 +291,14 @@ def bin_dataframe(
     # create the coordinate axes for the xarray output
     # if provided as array, they are interpreted as bin centers
     if isinstance(bins[0], np.ndarray):
-        bins = cast(List[np.ndarray], bins)
+        bins = cast(list[np.ndarray], bins)
         coords = dict(zip(axes, bins))
     elif ranges is None:
         raise ValueError(
             "bins is not an array and range is none. this shouldn't happen.",
         )
     else:
-        bins = cast(List[int], bins)
+        bins = cast(list[int], bins)
         coords = {
             ax: np.linspace(r[0], r[1], n, endpoint=False) for ax, r, n in zip(axes, ranges, bins)
         }
@@ -509,7 +496,7 @@ def normalization_histogram_from_timed_dataframe(
 
 
 def apply_jitter_on_column(
-    df: Union[dask.dataframe.core.DataFrame, pd.DataFrame],
+    df: dask.dataframe.core.DataFrame | pd.DataFrame,
     amp: float,
     col: str,
     mode: str = "uniform",